xinference 0.13.1__py3-none-any.whl → 0.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (82) hide show
  1. xinference/__init__.py +0 -1
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +99 -5
  4. xinference/client/restful/restful_client.py +98 -1
  5. xinference/core/chat_interface.py +2 -2
  6. xinference/core/model.py +85 -26
  7. xinference/core/scheduler.py +4 -4
  8. xinference/model/audio/chattts.py +40 -8
  9. xinference/model/audio/core.py +5 -2
  10. xinference/model/audio/cosyvoice.py +136 -0
  11. xinference/model/audio/model_spec.json +24 -0
  12. xinference/model/audio/model_spec_modelscope.json +27 -0
  13. xinference/model/flexible/launchers/__init__.py +1 -0
  14. xinference/model/flexible/launchers/image_process_launcher.py +70 -0
  15. xinference/model/image/core.py +3 -0
  16. xinference/model/image/model_spec.json +21 -0
  17. xinference/model/image/stable_diffusion/core.py +49 -7
  18. xinference/model/llm/llm_family.json +1065 -106
  19. xinference/model/llm/llm_family.py +26 -6
  20. xinference/model/llm/llm_family_csghub.json +39 -0
  21. xinference/model/llm/llm_family_modelscope.json +460 -47
  22. xinference/model/llm/pytorch/chatglm.py +243 -5
  23. xinference/model/llm/pytorch/cogvlm2.py +1 -1
  24. xinference/model/llm/sglang/core.py +7 -2
  25. xinference/model/llm/utils.py +78 -1
  26. xinference/model/llm/vllm/core.py +11 -0
  27. xinference/thirdparty/cosyvoice/__init__.py +0 -0
  28. xinference/thirdparty/cosyvoice/bin/__init__.py +0 -0
  29. xinference/thirdparty/cosyvoice/bin/inference.py +114 -0
  30. xinference/thirdparty/cosyvoice/bin/train.py +136 -0
  31. xinference/thirdparty/cosyvoice/cli/__init__.py +0 -0
  32. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +83 -0
  33. xinference/thirdparty/cosyvoice/cli/frontend.py +168 -0
  34. xinference/thirdparty/cosyvoice/cli/model.py +60 -0
  35. xinference/thirdparty/cosyvoice/dataset/__init__.py +0 -0
  36. xinference/thirdparty/cosyvoice/dataset/dataset.py +160 -0
  37. xinference/thirdparty/cosyvoice/dataset/processor.py +369 -0
  38. xinference/thirdparty/cosyvoice/flow/__init__.py +0 -0
  39. xinference/thirdparty/cosyvoice/flow/decoder.py +222 -0
  40. xinference/thirdparty/cosyvoice/flow/flow.py +135 -0
  41. xinference/thirdparty/cosyvoice/flow/flow_matching.py +138 -0
  42. xinference/thirdparty/cosyvoice/flow/length_regulator.py +49 -0
  43. xinference/thirdparty/cosyvoice/hifigan/__init__.py +0 -0
  44. xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +55 -0
  45. xinference/thirdparty/cosyvoice/hifigan/generator.py +391 -0
  46. xinference/thirdparty/cosyvoice/llm/__init__.py +0 -0
  47. xinference/thirdparty/cosyvoice/llm/llm.py +206 -0
  48. xinference/thirdparty/cosyvoice/transformer/__init__.py +0 -0
  49. xinference/thirdparty/cosyvoice/transformer/activation.py +84 -0
  50. xinference/thirdparty/cosyvoice/transformer/attention.py +326 -0
  51. xinference/thirdparty/cosyvoice/transformer/convolution.py +145 -0
  52. xinference/thirdparty/cosyvoice/transformer/decoder.py +396 -0
  53. xinference/thirdparty/cosyvoice/transformer/decoder_layer.py +132 -0
  54. xinference/thirdparty/cosyvoice/transformer/embedding.py +293 -0
  55. xinference/thirdparty/cosyvoice/transformer/encoder.py +472 -0
  56. xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +236 -0
  57. xinference/thirdparty/cosyvoice/transformer/label_smoothing_loss.py +96 -0
  58. xinference/thirdparty/cosyvoice/transformer/positionwise_feed_forward.py +115 -0
  59. xinference/thirdparty/cosyvoice/transformer/subsampling.py +383 -0
  60. xinference/thirdparty/cosyvoice/utils/__init__.py +0 -0
  61. xinference/thirdparty/cosyvoice/utils/class_utils.py +70 -0
  62. xinference/thirdparty/cosyvoice/utils/common.py +103 -0
  63. xinference/thirdparty/cosyvoice/utils/executor.py +110 -0
  64. xinference/thirdparty/cosyvoice/utils/file_utils.py +41 -0
  65. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +125 -0
  66. xinference/thirdparty/cosyvoice/utils/mask.py +227 -0
  67. xinference/thirdparty/cosyvoice/utils/scheduler.py +739 -0
  68. xinference/thirdparty/cosyvoice/utils/train_utils.py +289 -0
  69. xinference/web/ui/build/asset-manifest.json +3 -3
  70. xinference/web/ui/build/index.html +1 -1
  71. xinference/web/ui/build/static/js/{main.95c1d652.js → main.2ef0cfaf.js} +3 -3
  72. xinference/web/ui/build/static/js/main.2ef0cfaf.js.map +1 -0
  73. xinference/web/ui/node_modules/.cache/babel-loader/b6807ecc0c231fea699533518a0eb2a2bf68a081ce00d452be40600dbffa17a7.json +1 -0
  74. {xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/METADATA +18 -8
  75. {xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/RECORD +80 -36
  76. xinference/web/ui/build/static/js/main.95c1d652.js.map +0 -1
  77. xinference/web/ui/node_modules/.cache/babel-loader/709711edada3f1596b309d571285fd31f1c364d66f4425bc28723d0088cc351a.json +0 -1
  78. /xinference/web/ui/build/static/js/{main.95c1d652.js.LICENSE.txt → main.2ef0cfaf.js.LICENSE.txt} +0 -0
  79. {xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/LICENSE +0 -0
  80. {xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/WHEEL +0 -0
  81. {xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/entry_points.txt +0 -0
  82. {xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/top_level.txt +0 -0
@@ -819,7 +819,7 @@
819
819
  "none"
820
820
  ],
821
821
  "model_id": "THUDM/glm-4-9b-chat",
822
- "model_revision": "b84dc74294ccd507a3d78bde8aebf628221af9bd"
822
+ "model_revision": "76f3474a854145aa4a9ed2612fee9bc8d4a8966b"
823
823
  },
824
824
  {
825
825
  "model_format": "ggufv2",
@@ -983,6 +983,65 @@
983
983
  ]
984
984
  }
985
985
  },
986
+ {
987
+ "version": 1,
988
+ "context_length": 131072,
989
+ "model_name": "codegeex4",
990
+ "model_lang": [
991
+ "en",
992
+ "zh"
993
+ ],
994
+ "model_ability": [
995
+ "chat"
996
+ ],
997
+ "model_description": "the open-source version of the latest CodeGeeX4 model series",
998
+ "model_specs": [
999
+ {
1000
+ "model_format": "pytorch",
1001
+ "model_size_in_billions": 9,
1002
+ "quantizations": [
1003
+ "4-bit",
1004
+ "8-bit",
1005
+ "none"
1006
+ ],
1007
+ "model_id": "THUDM/codegeex4-all-9b",
1008
+ "model_revision": "8c4ec1d2f2888412640825a7aa23355939a8f4c6"
1009
+ },
1010
+ {
1011
+ "model_format": "ggufv2",
1012
+ "model_size_in_billions": 9,
1013
+ "quantizations": [
1014
+ "IQ2_M",
1015
+ "IQ3_M",
1016
+ "Q4_K_M",
1017
+ "Q5_K_M",
1018
+ "Q6_K_L",
1019
+ "Q8_0"
1020
+ ],
1021
+ "model_file_name_template": "codegeex4-all-9b-{quantization}.gguf",
1022
+ "model_id": "THUDM/codegeex4-all-9b-GGUF",
1023
+ "model_revision": "6a04071c54c943949826d4815ee00717ed8cf153"
1024
+ }
1025
+ ],
1026
+ "prompt_style": {
1027
+ "style_name": "CHATGLM3",
1028
+ "system_prompt": "",
1029
+ "roles": [
1030
+ "user",
1031
+ "assistant"
1032
+ ],
1033
+ "stop_token_ids": [
1034
+ 151329,
1035
+ 151336,
1036
+ 151338
1037
+ ],
1038
+ "stop": [
1039
+ "<|endoftext|>",
1040
+ "<|user|>",
1041
+ "<|observation|>"
1042
+ ]
1043
+ }
1044
+ },
986
1045
  {
987
1046
  "version": 1,
988
1047
  "context_length": 2048,
@@ -1593,6 +1652,329 @@
1593
1652
  "none"
1594
1653
  ],
1595
1654
  "model_id": "meta-llama/Meta-Llama-3-70B-Instruct"
1655
+ },
1656
+ {
1657
+ "model_format": "mlx",
1658
+ "model_size_in_billions": 8,
1659
+ "quantizations": [
1660
+ "4-bit"
1661
+ ],
1662
+ "model_id": "mlx-community/Meta-Llama-3-8B-Instruct-4bit"
1663
+ },
1664
+ {
1665
+ "model_format": "mlx",
1666
+ "model_size_in_billions": 8,
1667
+ "quantizations": [
1668
+ "8-bit"
1669
+ ],
1670
+ "model_id": "mlx-community/Meta-Llama-3-8B-Instruct-8bit"
1671
+ },
1672
+ {
1673
+ "model_format": "mlx",
1674
+ "model_size_in_billions": 8,
1675
+ "quantizations": [
1676
+ "none"
1677
+ ],
1678
+ "model_id": "mlx-community/Meta-Llama-3-8B-Instruct"
1679
+ },
1680
+ {
1681
+ "model_format": "mlx",
1682
+ "model_size_in_billions": 70,
1683
+ "quantizations": [
1684
+ "4-bit"
1685
+ ],
1686
+ "model_id": "mlx-community/Meta-Llama-3-70B-Instruct-4bit-mlx"
1687
+ },
1688
+ {
1689
+ "model_format": "mlx",
1690
+ "model_size_in_billions": 70,
1691
+ "quantizations": [
1692
+ "8-bit"
1693
+ ],
1694
+ "model_id": "mlx-community/Meta-Llama-3-70B-Instruct-8bit"
1695
+ },
1696
+ {
1697
+ "model_format": "mlx",
1698
+ "model_size_in_billions": 70,
1699
+ "quantizations": [
1700
+ "none"
1701
+ ],
1702
+ "model_id": "mlx-community/Meta-Llama-3-70B-Instruct-mlx-unquantized"
1703
+ },
1704
+ {
1705
+ "model_format": "gptq",
1706
+ "model_size_in_billions": 8,
1707
+ "quantizations": [
1708
+ "Int4"
1709
+ ],
1710
+ "model_id": "TechxGenus/Meta-Llama-3-8B-Instruct-GPTQ"
1711
+ },
1712
+ {
1713
+ "model_format": "gptq",
1714
+ "model_size_in_billions": 70,
1715
+ "quantizations": [
1716
+ "Int4"
1717
+ ],
1718
+ "model_id": "TechxGenus/Meta-Llama-3-70B-Instruct-GPTQ"
1719
+ }
1720
+ ],
1721
+ "prompt_style": {
1722
+ "style_name": "LLAMA3",
1723
+ "system_prompt": "You are a helpful assistant.",
1724
+ "roles": [
1725
+ "user",
1726
+ "assistant"
1727
+ ],
1728
+ "intra_message_sep": "\n\n",
1729
+ "inter_message_sep": "<|eot_id|>",
1730
+ "stop_token_ids": [
1731
+ 128001,
1732
+ 128009
1733
+ ],
1734
+ "stop": [
1735
+ "<|end_of_text|>",
1736
+ "<|eot_id|>"
1737
+ ]
1738
+ }
1739
+ },
1740
+ {
1741
+ "version": 1,
1742
+ "context_length": 131072,
1743
+ "model_name": "llama-3.1",
1744
+ "model_lang": [
1745
+ "en",
1746
+ "de",
1747
+ "fr",
1748
+ "it",
1749
+ "pt",
1750
+ "hi",
1751
+ "es",
1752
+ "th"
1753
+ ],
1754
+ "model_ability": [
1755
+ "generate"
1756
+ ],
1757
+ "model_description": "Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture",
1758
+ "model_specs": [
1759
+ {
1760
+ "model_format": "pytorch",
1761
+ "model_size_in_billions": 8,
1762
+ "quantizations": [
1763
+ "4-bit",
1764
+ "8-bit",
1765
+ "none"
1766
+ ],
1767
+ "model_id": "meta-llama/Meta-Llama-3.1-8B"
1768
+ },
1769
+ {
1770
+ "model_format": "ggufv2",
1771
+ "model_size_in_billions": 8,
1772
+ "quantizations": [
1773
+ "Q2_K",
1774
+ "Q3_K_L",
1775
+ "Q3_K_M",
1776
+ "Q3_K_S",
1777
+ "Q4_0",
1778
+ "Q4_1",
1779
+ "Q4_K_M",
1780
+ "Q4_K_S",
1781
+ "Q5_0",
1782
+ "Q5_1",
1783
+ "Q5_K_M",
1784
+ "Q5_K_S",
1785
+ "Q6_K",
1786
+ "Q8_0"
1787
+ ],
1788
+ "model_id": "QuantFactory/Meta-Llama-3.1-8B-GGUF",
1789
+ "model_file_name_template": "Meta-Llama-3.1-8B.{quantization}.gguf"
1790
+ },
1791
+ {
1792
+ "model_format": "pytorch",
1793
+ "model_size_in_billions": 70,
1794
+ "quantizations": [
1795
+ "4-bit",
1796
+ "8-bit",
1797
+ "none"
1798
+ ],
1799
+ "model_id": "meta-llama/Meta-Llama-3.1-70B"
1800
+ }
1801
+ ]
1802
+ },
1803
+ {
1804
+ "version": 1,
1805
+ "context_length": 131072,
1806
+ "model_name": "llama-3.1-instruct",
1807
+ "model_lang": [
1808
+ "en",
1809
+ "de",
1810
+ "fr",
1811
+ "it",
1812
+ "pt",
1813
+ "hi",
1814
+ "es",
1815
+ "th"
1816
+ ],
1817
+ "model_ability": [
1818
+ "chat"
1819
+ ],
1820
+ "model_description": "The Llama 3.1 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
1821
+ "model_specs": [
1822
+ {
1823
+ "model_format": "ggufv2",
1824
+ "model_size_in_billions": 8,
1825
+ "quantizations": [
1826
+ "Q3_K_L",
1827
+ "IQ4_XS",
1828
+ "Q4_K_M",
1829
+ "Q5_K_M",
1830
+ "Q6_K",
1831
+ "Q8_0"
1832
+ ],
1833
+ "model_id": "lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF",
1834
+ "model_file_name_template": "Meta-Llama-3.1-8B-Instruct-{quantization}.gguf"
1835
+ },
1836
+ {
1837
+ "model_format": "pytorch",
1838
+ "model_size_in_billions": 8,
1839
+ "quantizations": [
1840
+ "none"
1841
+ ],
1842
+ "model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct"
1843
+ },
1844
+ {
1845
+ "model_format": "pytorch",
1846
+ "model_size_in_billions": 8,
1847
+ "quantizations": [
1848
+ "4-bit"
1849
+ ],
1850
+ "model_id": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
1851
+ },
1852
+ {
1853
+ "model_format": "gptq",
1854
+ "model_size_in_billions": 8,
1855
+ "quantizations": [
1856
+ "Int4"
1857
+ ],
1858
+ "model_id": "hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4"
1859
+ },
1860
+ {
1861
+ "model_format": "awq",
1862
+ "model_size_in_billions": 8,
1863
+ "quantizations": [
1864
+ "Int4"
1865
+ ],
1866
+ "model_id": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4"
1867
+ },
1868
+ {
1869
+ "model_format": "ggufv2",
1870
+ "model_size_in_billions": 70,
1871
+ "quantizations": [
1872
+ "IQ2_M",
1873
+ "IQ4_XS",
1874
+ "Q2_K",
1875
+ "Q3_K_S",
1876
+ "Q4_K_M",
1877
+ "Q5_K_M",
1878
+ "Q6_K",
1879
+ "Q8_0"
1880
+ ],
1881
+ "quantization_parts": {
1882
+ "Q5_K_M": [
1883
+ "00001-of-00002",
1884
+ "00002-of-00002"
1885
+ ],
1886
+ "Q6_K": [
1887
+ "00001-of-00002",
1888
+ "00002-of-00002"
1889
+ ],
1890
+ "Q8_0": [
1891
+ "00001-of-00002",
1892
+ "00002-of-00002"
1893
+ ]
1894
+ },
1895
+ "model_id": "lmstudio-community/Meta-Llama-3.1-70B-Instruct-GGUF",
1896
+ "model_file_name_template": "Meta-Llama-3.1-70B-Instruct-{quantization}.gguf",
1897
+ "model_file_name_split_template": "Meta-Llama-3.1-70B-Instruct-{quantization}-{part}.gguf"
1898
+ },
1899
+ {
1900
+ "model_format": "pytorch",
1901
+ "model_size_in_billions": 70,
1902
+ "quantizations": [
1903
+ "none"
1904
+ ],
1905
+ "model_id": "meta-llama/Meta-Llama-3.1-70B-Instruct"
1906
+ },
1907
+ {
1908
+ "model_format": "pytorch",
1909
+ "model_size_in_billions": 70,
1910
+ "quantizations": [
1911
+ "4-bit"
1912
+ ],
1913
+ "model_id": "unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit"
1914
+ },
1915
+ {
1916
+ "model_format": "gptq",
1917
+ "model_size_in_billions": 70,
1918
+ "quantizations": [
1919
+ "Int4"
1920
+ ],
1921
+ "model_id": "hugging-quants/Meta-Llama-3.1-70B-Instruct-GPTQ-INT4"
1922
+ },
1923
+ {
1924
+ "model_format": "awq",
1925
+ "model_size_in_billions": 70,
1926
+ "quantizations": [
1927
+ "Int4"
1928
+ ],
1929
+ "model_id": "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4"
1930
+ },
1931
+ {
1932
+ "model_format": "mlx",
1933
+ "model_size_in_billions": 8,
1934
+ "quantizations": [
1935
+ "4-bit"
1936
+ ],
1937
+ "model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
1938
+ },
1939
+ {
1940
+ "model_format": "mlx",
1941
+ "model_size_in_billions": 8,
1942
+ "quantizations": [
1943
+ "8-bit"
1944
+ ],
1945
+ "model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
1946
+ },
1947
+ {
1948
+ "model_format": "mlx",
1949
+ "model_size_in_billions": 8,
1950
+ "quantizations": [
1951
+ "none"
1952
+ ],
1953
+ "model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct"
1954
+ },
1955
+ {
1956
+ "model_format": "mlx",
1957
+ "model_size_in_billions": 70,
1958
+ "quantizations": [
1959
+ "4-bit"
1960
+ ],
1961
+ "model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
1962
+ },
1963
+ {
1964
+ "model_format": "mlx",
1965
+ "model_size_in_billions": 70,
1966
+ "quantizations": [
1967
+ "8-bit"
1968
+ ],
1969
+ "model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-8bit"
1970
+ },
1971
+ {
1972
+ "model_format": "mlx",
1973
+ "model_size_in_billions": 70,
1974
+ "quantizations": [
1975
+ "none"
1976
+ ],
1977
+ "model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-bf16"
1596
1978
  }
1597
1979
  ],
1598
1980
  "prompt_style": {
@@ -3732,19 +4114,219 @@
3732
4114
  "Q6_K",
3733
4115
  "Q8_0"
3734
4116
  ],
3735
- "model_id": "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
3736
- "model_file_name_template": "mistral-7b-instruct-v0.2.{quantization}.gguf"
4117
+ "model_id": "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
4118
+ "model_file_name_template": "mistral-7b-instruct-v0.2.{quantization}.gguf"
4119
+ }
4120
+ ],
4121
+ "prompt_style": {
4122
+ "style_name": "LLAMA2",
4123
+ "system_prompt": "[INST] ",
4124
+ "roles": [
4125
+ "[INST]",
4126
+ "[/INST]"
4127
+ ],
4128
+ "intra_message_sep": " ",
4129
+ "inter_message_sep": "<s>",
4130
+ "stop_token_ids": [
4131
+ 2
4132
+ ],
4133
+ "stop": [
4134
+ "</s>"
4135
+ ]
4136
+ }
4137
+ },
4138
+ {
4139
+ "version": 1,
4140
+ "context_length": 32768,
4141
+ "model_name": "mistral-instruct-v0.3",
4142
+ "model_lang": [
4143
+ "en"
4144
+ ],
4145
+ "model_ability": [
4146
+ "chat"
4147
+ ],
4148
+ "model_description": "The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an improved instruct fine-tuned version of Mistral-7B-Instruct-v0.1.",
4149
+ "model_specs": [
4150
+ {
4151
+ "model_format": "pytorch",
4152
+ "model_size_in_billions": 7,
4153
+ "quantizations": [
4154
+ "4-bit",
4155
+ "8-bit",
4156
+ "none"
4157
+ ],
4158
+ "model_id": "mistralai/Mistral-7B-Instruct-v0.3",
4159
+ "model_revision": "83e9aa141f2e28c82232fea5325f54edf17c43de"
4160
+ },
4161
+ {
4162
+ "model_format": "gptq",
4163
+ "model_size_in_billions": 7,
4164
+ "quantizations": [
4165
+ "Int4"
4166
+ ],
4167
+ "model_id": "neuralmagic/Mistral-7B-Instruct-v0.3-GPTQ-4bit"
4168
+ },
4169
+ {
4170
+ "model_format": "awq",
4171
+ "model_size_in_billions": 7,
4172
+ "quantizations": [
4173
+ "Int4"
4174
+ ],
4175
+ "model_id": "solidrust/Mistral-7B-Instruct-v0.3-AWQ"
4176
+ },
4177
+ {
4178
+ "model_format": "ggufv2",
4179
+ "model_size_in_billions": 7,
4180
+ "quantizations": [
4181
+ "Q2_K",
4182
+ "Q3_K_S",
4183
+ "Q3_K_M",
4184
+ "Q3_K_L",
4185
+ "Q4_K_S",
4186
+ "Q4_K_M",
4187
+ "Q5_K_S",
4188
+ "Q5_K_M",
4189
+ "Q6_K",
4190
+ "Q8_0",
4191
+ "fp16"
4192
+ ],
4193
+ "model_id": "MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF",
4194
+ "model_file_name_template": "Mistral-7B-Instruct-v0.3.{quantization}.gguf"
4195
+ }
4196
+ ],
4197
+ "prompt_style": {
4198
+ "style_name": "LLAMA2",
4199
+ "system_prompt": "[INST] ",
4200
+ "roles": [
4201
+ "[INST]",
4202
+ "[/INST]"
4203
+ ],
4204
+ "intra_message_sep": " ",
4205
+ "inter_message_sep": "<s>",
4206
+ "stop_token_ids": [
4207
+ 2
4208
+ ],
4209
+ "stop": [
4210
+ "</s>"
4211
+ ]
4212
+ }
4213
+ },
4214
+ {
4215
+ "version": 1,
4216
+ "context_length": 1024000,
4217
+ "model_name": "mistral-nemo-instruct",
4218
+ "model_lang": [
4219
+ "en",
4220
+ "fr",
4221
+ "de",
4222
+ "es",
4223
+ "it",
4224
+ "pt",
4225
+ "zh",
4226
+ "ru",
4227
+ "ja"
4228
+ ],
4229
+ "model_ability": [
4230
+ "chat"
4231
+ ],
4232
+ "model_description": "The Mistral-Nemo-Instruct-2407 Large Language Model (LLM) is an instruct fine-tuned version of the Mistral-Nemo-Base-2407",
4233
+ "model_specs": [
4234
+ {
4235
+ "model_format": "pytorch",
4236
+ "model_size_in_billions": 12,
4237
+ "quantizations": [
4238
+ "none"
4239
+ ],
4240
+ "model_id": "mistralai/Mistral-Nemo-Instruct-2407",
4241
+ "model_revision": "05b1e4f3e189ec1b5189fb3c973d4cf3369c27af"
4242
+ },
4243
+ {
4244
+ "model_format": "pytorch",
4245
+ "model_size_in_billions": 12,
4246
+ "quantizations": [
4247
+ "4-bit"
4248
+ ],
4249
+ "model_id": "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
4250
+ "model_revision": "1d85adc9e0fff0b8e4479a037bd75fe1346333ca"
4251
+ },
4252
+ {
4253
+ "model_format": "pytorch",
4254
+ "model_size_in_billions": 12,
4255
+ "quantizations": [
4256
+ "8-bit"
4257
+ ],
4258
+ "model_id": "afrizalha/Mistral-Nemo-Instruct-2407-bnb-8bit",
4259
+ "model_revision": "1d2dacf18a486c745219317d1507441406bc7e25"
4260
+ },
4261
+ {
4262
+ "model_format": "gptq",
4263
+ "model_size_in_billions": 12,
4264
+ "quantizations": [
4265
+ "Int4"
4266
+ ],
4267
+ "model_id": "ModelCloud/Mistral-Nemo-Instruct-2407-gptq-4bit"
4268
+ },
4269
+ {
4270
+ "model_format": "awq",
4271
+ "model_size_in_billions": 12,
4272
+ "quantizations": [
4273
+ "Int4"
4274
+ ],
4275
+ "model_id": "casperhansen/mistral-nemo-instruct-2407-awq"
4276
+ },
4277
+ {
4278
+ "model_format": "ggufv2",
4279
+ "model_size_in_billions": 12,
4280
+ "quantizations": [
4281
+ "Q2_K",
4282
+ "Q3_K_S",
4283
+ "Q3_K_M",
4284
+ "Q3_K_L",
4285
+ "Q4_K_S",
4286
+ "Q4_K_M",
4287
+ "Q5_K_S",
4288
+ "Q5_K_M",
4289
+ "Q6_K",
4290
+ "Q8_0",
4291
+ "fp16"
4292
+ ],
4293
+ "model_id": "MaziyarPanahi/Mistral-Nemo-Instruct-2407-GGUF",
4294
+ "model_file_name_template": "Mistral-Nemo-Instruct-2407.{quantization}.gguf"
4295
+ },
4296
+ {
4297
+ "model_format": "mlx",
4298
+ "model_size_in_billions": 12,
4299
+ "quantizations": [
4300
+ "none"
4301
+ ],
4302
+ "model_id": "mlx-community/Mistral-Nemo-Instruct-2407-bf16"
4303
+ },
4304
+ {
4305
+ "model_format": "mlx",
4306
+ "model_size_in_billions": 12,
4307
+ "quantizations": [
4308
+ "4-bit"
4309
+ ],
4310
+ "model_id": "mlx-community/Mistral-Nemo-Instruct-2407-4bit"
4311
+ },
4312
+ {
4313
+ "model_format": "mlx",
4314
+ "model_size_in_billions": 12,
4315
+ "quantizations": [
4316
+ "8-bit"
4317
+ ],
4318
+ "model_id": "mlx-community/Mistral-Nemo-Instruct-2407-8bit"
3737
4319
  }
3738
4320
  ],
3739
4321
  "prompt_style": {
3740
- "style_name": "LLAMA2",
3741
- "system_prompt": "[INST] ",
4322
+ "style_name": "mistral-nemo",
4323
+ "system_prompt": "",
3742
4324
  "roles": [
3743
4325
  "[INST]",
3744
4326
  "[/INST]"
3745
4327
  ],
3746
- "intra_message_sep": " ",
3747
- "inter_message_sep": "<s>",
4328
+ "intra_message_sep": "",
4329
+ "inter_message_sep": "</s>",
3748
4330
  "stop_token_ids": [
3749
4331
  2
3750
4332
  ],
@@ -3755,72 +4337,153 @@
3755
4337
  },
3756
4338
  {
3757
4339
  "version": 1,
3758
- "context_length": 32768,
3759
- "model_name": "mistral-instruct-v0.3",
4340
+ "context_length": 131072,
4341
+ "model_name": "mistral-large-instruct",
3760
4342
  "model_lang": [
3761
- "en"
4343
+ "en",
4344
+ "fr",
4345
+ "de",
4346
+ "es",
4347
+ "it",
4348
+ "pt",
4349
+ "zh",
4350
+ "ru",
4351
+ "ja",
4352
+ "ko"
3762
4353
  ],
3763
4354
  "model_ability": [
3764
4355
  "chat"
3765
4356
  ],
3766
- "model_description": "The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an improved instruct fine-tuned version of Mistral-7B-Instruct-v0.1.",
4357
+ "model_description": "Mistral-Large-Instruct-2407 is an advanced dense Large Language Model (LLM) of 123B parameters with state-of-the-art reasoning, knowledge and coding capabilities.",
3767
4358
  "model_specs": [
3768
4359
  {
3769
4360
  "model_format": "pytorch",
3770
- "model_size_in_billions": 7,
4361
+ "model_size_in_billions": 123,
3771
4362
  "quantizations": [
3772
- "4-bit",
3773
- "8-bit",
3774
4363
  "none"
3775
4364
  ],
3776
- "model_id": "mistralai/Mistral-7B-Instruct-v0.3",
3777
- "model_revision": "83e9aa141f2e28c82232fea5325f54edf17c43de"
4365
+ "model_id": "mistralai/Mistral-Large-Instruct-2407"
4366
+ },
4367
+ {
4368
+ "model_format": "pytorch",
4369
+ "model_size_in_billions": 123,
4370
+ "quantizations": [
4371
+ "4-bit"
4372
+ ],
4373
+ "model_id": "unsloth/Mistral-Large-Instruct-2407-bnb-4bit"
3778
4374
  },
3779
4375
  {
3780
4376
  "model_format": "gptq",
3781
- "model_size_in_billions": 7,
4377
+ "model_size_in_billions": 123,
3782
4378
  "quantizations": [
3783
4379
  "Int4"
3784
4380
  ],
3785
- "model_id": "neuralmagic/Mistral-7B-Instruct-v0.3-GPTQ-4bit"
4381
+ "model_id": "ModelCloud/Mistral-Large-Instruct-2407-gptq-4bit"
3786
4382
  },
3787
4383
  {
3788
4384
  "model_format": "awq",
3789
- "model_size_in_billions": 7,
4385
+ "model_size_in_billions": 123,
3790
4386
  "quantizations": [
3791
4387
  "Int4"
3792
4388
  ],
3793
- "model_id": "solidrust/Mistral-7B-Instruct-v0.3-AWQ"
4389
+ "model_id": "TechxGenus/Mistral-Large-Instruct-2407-AWQ"
3794
4390
  },
3795
4391
  {
3796
4392
  "model_format": "ggufv2",
3797
- "model_size_in_billions": 7,
4393
+ "model_size_in_billions": 123,
3798
4394
  "quantizations": [
3799
4395
  "Q2_K",
3800
4396
  "Q3_K_S",
3801
4397
  "Q3_K_M",
3802
4398
  "Q3_K_L",
3803
4399
  "Q4_K_S",
3804
- "Q4_K_M",
3805
- "Q5_K_S",
3806
- "Q5_K_M",
3807
- "Q6_K",
3808
- "Q8_0",
3809
- "fp16"
4400
+ "Q4_K_M"
3810
4401
  ],
3811
- "model_id": "MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF",
3812
- "model_file_name_template": "Mistral-7B-Instruct-v0.3.{quantization}.gguf"
4402
+ "model_id": "MaziyarPanahi/Mistral-Large-Instruct-2407-GGUF",
4403
+ "model_file_name_template": "Mistral-Large-Instruct-2407.{quantization}.gguf",
4404
+ "model_file_name_split_template": "Mixtral-8x22B-Instruct-v0.1.{quantization}-{part}.gguf",
4405
+ "quantization_parts": {
4406
+ "Q3_K_L": [
4407
+ "00001-of-00007",
4408
+ "00002-of-00007",
4409
+ "00003-of-00007",
4410
+ "00004-of-00007",
4411
+ "00005-of-00007",
4412
+ "00006-of-00007",
4413
+ "00007-of-00007"
4414
+ ],
4415
+ "Q3_K_M": [
4416
+ "00001-of-00007",
4417
+ "00002-of-00007",
4418
+ "00003-of-00007",
4419
+ "00004-of-00007",
4420
+ "00005-of-00007",
4421
+ "00006-of-00007",
4422
+ "00007-of-00007"
4423
+ ],
4424
+ "Q3_K_S": [
4425
+ "00001-of-00007",
4426
+ "00002-of-00007",
4427
+ "00003-of-00007",
4428
+ "00004-of-00007",
4429
+ "00005-of-00007",
4430
+ "00006-of-00007",
4431
+ "00007-of-00007"
4432
+ ],
4433
+ "Q4_K_M": [
4434
+ "00001-of-00007",
4435
+ "00002-of-00007",
4436
+ "00003-of-00007",
4437
+ "00004-of-00007",
4438
+ "00005-of-00007",
4439
+ "00006-of-00007",
4440
+ "00007-of-00007"
4441
+ ],
4442
+ "Q4_K_S": [
4443
+ "00001-of-00007",
4444
+ "00002-of-00007",
4445
+ "00003-of-00007",
4446
+ "00004-of-00007",
4447
+ "00005-of-00007",
4448
+ "00006-of-00007",
4449
+ "00007-of-00007"
4450
+ ]
4451
+ }
4452
+ },
4453
+ {
4454
+ "model_format": "mlx",
4455
+ "model_size_in_billions": 123,
4456
+ "quantizations": [
4457
+ "none"
4458
+ ],
4459
+ "model_id": "mlx-community/Mistral-Large-Instruct-2407-bf16"
4460
+ },
4461
+ {
4462
+ "model_format": "mlx",
4463
+ "model_size_in_billions": 123,
4464
+ "quantizations": [
4465
+ "4-bit"
4466
+ ],
4467
+ "model_id": "mlx-community/Mistral-Large-Instruct-2407-4bit"
4468
+ },
4469
+ {
4470
+ "model_format": "mlx",
4471
+ "model_size_in_billions": 123,
4472
+ "quantizations": [
4473
+ "8-bit"
4474
+ ],
4475
+ "model_id": "mlx-community/Mistral-Large-Instruct-2407-8bit"
3813
4476
  }
3814
4477
  ],
3815
4478
  "prompt_style": {
3816
- "style_name": "LLAMA2",
3817
- "system_prompt": "[INST] ",
4479
+ "style_name": "mistral-nemo",
4480
+ "system_prompt": "",
3818
4481
  "roles": [
3819
4482
  "[INST]",
3820
4483
  "[/INST]"
3821
4484
  ],
3822
- "intra_message_sep": " ",
3823
- "inter_message_sep": "<s>",
4485
+ "intra_message_sep": "",
4486
+ "inter_message_sep": "</s>",
3824
4487
  "stop_token_ids": [
3825
4488
  2
3826
4489
  ],
@@ -3869,6 +4532,24 @@
3869
4532
  ],
3870
4533
  "model_id": "bartowski/Codestral-22B-v0.1-GGUF",
3871
4534
  "model_file_name_template": "Codestral-22B-v0.1-{quantization}.gguf"
4535
+ },
4536
+ {
4537
+ "model_format": "mlx",
4538
+ "model_size_in_billions": 22,
4539
+ "quantizations": [
4540
+ "4-bit"
4541
+ ],
4542
+ "model_id": "mlx-community/Codestral-22B-v0.1-4bit",
4543
+ "model_revision": "544626b38eb1c9524f0fa570ec7b29550c26b78d"
4544
+ },
4545
+ {
4546
+ "model_format": "mlx",
4547
+ "model_size_in_billions": 22,
4548
+ "quantizations": [
4549
+ "8-bit"
4550
+ ],
4551
+ "model_id": "mlx-community/Codestral-22B-v0.1-8bit",
4552
+ "model_revision": "0399a53970663950d57010e61a2796af524a1588"
3872
4553
  }
3873
4554
  ]
3874
4555
  },
@@ -4609,6 +5290,61 @@
4609
5290
  "model_id": "modelscope/Yi-1.5-34B-Chat-AWQ",
4610
5291
  "model_revision": "26234fea6ac49d456f32f8017289021fb1087a04"
4611
5292
  }
5293
+ ,
5294
+ {
5295
+ "model_format": "mlx",
5296
+ "model_size_in_billions": 6,
5297
+ "quantizations": [
5298
+ "4-bit"
5299
+ ],
5300
+ "model_id": "mlx-community/Yi-1.5-6B-Chat-4bit",
5301
+ "model_revision": "0177c9a12b869d6bc73f772b5a1981a7c966adb6"
5302
+ },
5303
+ {
5304
+ "model_format": "mlx",
5305
+ "model_size_in_billions": 6,
5306
+ "quantizations": [
5307
+ "8-bit"
5308
+ ],
5309
+ "model_id": "mlx-community/Yi-1.5-6B-Chat-8bit",
5310
+ "model_revision": "7756e65d1bf1e2e6e97aef6bc9484307225f536b"
5311
+ },
5312
+ {
5313
+ "model_format": "mlx",
5314
+ "model_size_in_billions": 9,
5315
+ "quantizations": [
5316
+ "4-bit"
5317
+ ],
5318
+ "model_id": "mlx-community/Yi-1.5-9B-Chat-4bit",
5319
+ "model_revision": "e15f886479c44e7d90f0ac13ace69b2319b71c2f"
5320
+ },
5321
+ {
5322
+ "model_format": "mlx",
5323
+ "model_size_in_billions": 9,
5324
+ "quantizations": [
5325
+ "8-bit"
5326
+ ],
5327
+ "model_id": "mlx-community/Yi-1.5-9B-Chat-8bit",
5328
+ "model_revision": "c1f742fcf3683edbe2d2c2fd1ad7ac2bb6c5ca36"
5329
+ },
5330
+ {
5331
+ "model_format": "mlx",
5332
+ "model_size_in_billions": 34,
5333
+ "quantizations": [
5334
+ "4-bit"
5335
+ ],
5336
+ "model_id": "mlx-community/Yi-1.5-34B-Chat-4bit",
5337
+ "model_revision": "945e3b306ef37c46ab444fdc857d1f3ea7247374"
5338
+ },
5339
+ {
5340
+ "model_format": "mlx",
5341
+ "model_size_in_billions": 34,
5342
+ "quantizations": [
5343
+ "8-bit"
5344
+ ],
5345
+ "model_id": "mlx-community/Yi-1.5-34B-Chat-8bit",
5346
+ "model_revision": "3c12761a2c6663f216caab6dff84b0dd29b472ac"
5347
+ }
4612
5348
  ],
4613
5349
  "prompt_style": {
4614
5350
  "style_name": "CHATML",
@@ -5766,33 +6502,168 @@
5766
6502
  },
5767
6503
  {
5768
6504
  "version": 1,
5769
- "context_length": 4096,
5770
- "model_name": "Skywork-Math",
6505
+ "context_length": 4096,
6506
+ "model_name": "Skywork-Math",
6507
+ "model_lang": [
6508
+ "en",
6509
+ "zh"
6510
+ ],
6511
+ "model_ability": [
6512
+ "generate"
6513
+ ],
6514
+ "model_description": "Skywork is a series of large models developed by the Kunlun Group · Skywork team.",
6515
+ "model_specs": [
6516
+ {
6517
+ "model_format": "pytorch",
6518
+ "model_size_in_billions": 13,
6519
+ "quantizations": [
6520
+ "8-bit",
6521
+ "none"
6522
+ ],
6523
+ "model_id": "skywork/Skywork-13B-Math",
6524
+ "model_revision": "70d1740208c8ba39f9ba250b22117ec25311ab33"
6525
+ }
6526
+ ]
6527
+ },
6528
+ {
6529
+ "version": 1,
6530
+ "context_length": 32768,
6531
+ "model_name": "internlm2-chat",
6532
+ "model_lang": [
6533
+ "en",
6534
+ "zh"
6535
+ ],
6536
+ "model_ability": [
6537
+ "chat"
6538
+ ],
6539
+ "model_description": "The second generation of the InternLM model, InternLM2.",
6540
+ "model_specs": [
6541
+ {
6542
+ "model_format": "pytorch",
6543
+ "model_size_in_billions": 7,
6544
+ "quantizations": [
6545
+ "none"
6546
+ ],
6547
+ "model_id": "internlm/internlm2-chat-7b",
6548
+ "model_revision": "2292b86b21cb856642782cebed0a453997453b1f"
6549
+ },
6550
+ {
6551
+ "model_format": "pytorch",
6552
+ "model_size_in_billions": 20,
6553
+ "quantizations": [
6554
+ "none"
6555
+ ],
6556
+ "model_id": "internlm/internlm2-chat-20b",
6557
+ "model_revision": "b666125047cd98c5a7c85ca28720b44a06aed124"
6558
+ }
6559
+ ],
6560
+ "prompt_style": {
6561
+ "style_name": "INTERNLM2",
6562
+ "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
6563
+ "roles": [
6564
+ "<|im_start|>user",
6565
+ "<|im_start|>assistant"
6566
+ ],
6567
+ "intra_message_sep": "<|im_end|>",
6568
+ "stop_token_ids": [
6569
+ 2,
6570
+ 92542
6571
+ ],
6572
+ "stop": [
6573
+ "</s>",
6574
+ "<|im_end|>"
6575
+ ]
6576
+ }
6577
+ },
6578
+ {
6579
+ "version": 1,
6580
+ "context_length": 32768,
6581
+ "model_name": "internlm2.5-chat",
5771
6582
  "model_lang": [
5772
6583
  "en",
5773
6584
  "zh"
5774
6585
  ],
5775
6586
  "model_ability": [
5776
- "generate"
6587
+ "chat"
5777
6588
  ],
5778
- "model_description": "Skywork is a series of large models developed by the Kunlun Group · Skywork team.",
6589
+ "model_description": "InternLM2.5 series of the InternLM model.",
5779
6590
  "model_specs": [
5780
6591
  {
5781
6592
  "model_format": "pytorch",
5782
- "model_size_in_billions": 13,
6593
+ "model_size_in_billions": 7,
5783
6594
  "quantizations": [
5784
- "8-bit",
5785
6595
  "none"
5786
6596
  ],
5787
- "model_id": "skywork/Skywork-13B-Math",
5788
- "model_revision": "70d1740208c8ba39f9ba250b22117ec25311ab33"
6597
+ "model_id": "internlm/internlm2_5-7b-chat",
6598
+ "model_revision": "9dc8536a922ab4954726aad1b37fa199004a291a"
6599
+ },
6600
+ {
6601
+ "model_format": "gptq",
6602
+ "model_size_in_billions": 7,
6603
+ "quantizations": [
6604
+ "Int4"
6605
+ ],
6606
+ "model_id": "ModelCloud/internlm-2.5-7b-chat-gptq-4bit",
6607
+ "model_revision": "2e2dda735c326544921a4035bbeb6c6e316a8254"
6608
+ },
6609
+ {
6610
+ "model_format": "ggufv2",
6611
+ "model_size_in_billions": 7,
6612
+ "quantizations": [
6613
+ "q2_k",
6614
+ "q3_k_m",
6615
+ "q4_0",
6616
+ "q4_k_m",
6617
+ "q5_0",
6618
+ "q5_k_m",
6619
+ "q6_k",
6620
+ "q8_0",
6621
+ "fp16"
6622
+ ],
6623
+ "model_id": "internlm/internlm2_5-7b-chat-gguf",
6624
+ "model_file_name_template": "internlm2_5-7b-chat-{quantization}.gguf"
6625
+ },
6626
+ {
6627
+ "model_format": "mlx",
6628
+ "model_size_in_billions": 7,
6629
+ "quantizations": [
6630
+ "4-bit"
6631
+ ],
6632
+ "model_id": "mlx-community/internlm2_5-7b-chat-4bit",
6633
+ "model_revision": "d12097a867721978142a6048399f470a3d18beee"
6634
+ },
6635
+ {
6636
+ "model_format": "mlx",
6637
+ "model_size_in_billions": 7,
6638
+ "quantizations": [
6639
+ "8-bit"
6640
+ ],
6641
+ "model_id": "mlx-community/internlm2_5-7b-chat-8bit",
6642
+ "model_revision": "0ec94d61d30ab161b49c69f9bf92ec2b9986d234"
5789
6643
  }
5790
- ]
6644
+ ],
6645
+ "prompt_style": {
6646
+ "style_name": "INTERNLM2",
6647
+ "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
6648
+ "roles": [
6649
+ "<|im_start|>user",
6650
+ "<|im_start|>assistant"
6651
+ ],
6652
+ "intra_message_sep": "<|im_end|>",
6653
+ "stop_token_ids": [
6654
+ 2,
6655
+ 92542
6656
+ ],
6657
+ "stop": [
6658
+ "</s>",
6659
+ "<|im_end|>"
6660
+ ]
6661
+ }
5791
6662
  },
5792
6663
  {
5793
6664
  "version": 1,
5794
- "context_length": 204800,
5795
- "model_name": "internlm2-chat",
6665
+ "context_length": 262144,
6666
+ "model_name": "internlm2.5-chat-1m",
5796
6667
  "model_lang": [
5797
6668
  "en",
5798
6669
  "zh"
@@ -5800,7 +6671,7 @@
5800
6671
  "model_ability": [
5801
6672
  "chat"
5802
6673
  ],
5803
- "model_description": "The second generation of the InternLM model, InternLM2.",
6674
+ "model_description": "InternLM2.5 series of the InternLM model supports 1M long-context",
5804
6675
  "model_specs": [
5805
6676
  {
5806
6677
  "model_format": "pytorch",
@@ -5808,17 +6679,34 @@
5808
6679
  "quantizations": [
5809
6680
  "none"
5810
6681
  ],
5811
- "model_id": "internlm/internlm2-chat-7b",
5812
- "model_revision": "2292b86b21cb856642782cebed0a453997453b1f"
6682
+ "model_id": "internlm/internlm2_5-7b-chat-1m",
6683
+ "model_revision": "8d1a709a04d71440ef3df6ebbe204672f411c8b6"
5813
6684
  },
5814
6685
  {
5815
- "model_format": "pytorch",
5816
- "model_size_in_billions": 20,
6686
+ "model_format": "gptq",
6687
+ "model_size_in_billions": 7,
5817
6688
  "quantizations": [
5818
- "none"
6689
+ "Int4"
5819
6690
  ],
5820
- "model_id": "internlm/internlm2-chat-20b",
5821
- "model_revision": "b666125047cd98c5a7c85ca28720b44a06aed124"
6691
+ "model_id": "ModelCloud/internlm-2.5-7b-chat-1m-gptq-4bit",
6692
+ "model_revision": "022e59cb30f03b271d56178478acb038b2b9b58c"
6693
+ },
6694
+ {
6695
+ "model_format": "ggufv2",
6696
+ "model_size_in_billions": 7,
6697
+ "quantizations": [
6698
+ "q2_k",
6699
+ "q3_k_m",
6700
+ "q4_0",
6701
+ "q4_k_m",
6702
+ "q5_0",
6703
+ "q5_k_m",
6704
+ "q6_k",
6705
+ "q8_0",
6706
+ "fp16"
6707
+ ],
6708
+ "model_id": "internlm/internlm2_5-7b-chat-1m-gguf",
6709
+ "model_file_name_template": "internlm2_5-7b-chat-1m-{quantization}.gguf"
5822
6710
  }
5823
6711
  ],
5824
6712
  "prompt_style": {
@@ -6192,6 +7080,52 @@
6192
7080
  ],
6193
7081
  "model_id": "google/gemma-2-27b-it"
6194
7082
  },
7083
+ {
7084
+ "model_format": "ggufv2",
7085
+ "model_size_in_billions": 9,
7086
+ "quantizations": [
7087
+ "Q2_K",
7088
+ "Q2_K_L",
7089
+ "Q3_K_L",
7090
+ "Q3_K_M",
7091
+ "Q3_K_S",
7092
+ "Q4_K_L",
7093
+ "Q4_K_M",
7094
+ "Q4_K_S",
7095
+ "Q5_K_L",
7096
+ "Q5_K_M",
7097
+ "Q5_K_S",
7098
+ "Q6_K",
7099
+ "Q6_K_L",
7100
+ "Q8_0",
7101
+ "f32"
7102
+ ],
7103
+ "model_id": "bartowski/gemma-2-9b-it-GGUF",
7104
+ "model_file_name_template": "gemma-2-9b-it-{quantization}.gguf"
7105
+ },
7106
+ {
7107
+ "model_format": "ggufv2",
7108
+ "model_size_in_billions": 27,
7109
+ "quantizations": [
7110
+ "Q2_K",
7111
+ "Q2_K_L",
7112
+ "Q3_K_L",
7113
+ "Q3_K_M",
7114
+ "Q3_K_S",
7115
+ "Q4_K_L",
7116
+ "Q4_K_M",
7117
+ "Q4_K_S",
7118
+ "Q5_K_L",
7119
+ "Q5_K_M",
7120
+ "Q5_K_S",
7121
+ "Q6_K",
7122
+ "Q6_K_L",
7123
+ "Q8_0",
7124
+ "f32"
7125
+ ],
7126
+ "model_id": "bartowski/gemma-2-27b-it-GGUF",
7127
+ "model_file_name_template": "gemma-2-27b-it-{quantization}.gguf"
7128
+ },
6195
7129
  {
6196
7130
  "model_format": "mlx",
6197
7131
  "model_size_in_billions": 9,
@@ -6809,6 +7743,15 @@
6809
7743
  "model_id": "CohereForAI/c4ai-command-r-v01",
6810
7744
  "model_revision": "16881ccde1c68bbc7041280e6a66637bc46bfe88"
6811
7745
  },
7746
+ {
7747
+ "model_format": "pytorch",
7748
+ "model_size_in_billions": 35,
7749
+ "quantizations": [
7750
+ "4-bit"
7751
+ ],
7752
+ "model_id": "CohereForAI/c4ai-command-r-v01-4bit",
7753
+ "model_revision": "f2e87936a146643c9dd143422dcafb9cb1552611"
7754
+ },
6812
7755
  {
6813
7756
  "model_format": "ggufv2",
6814
7757
  "model_size_in_billions": 35,
@@ -6838,69 +7781,23 @@
6838
7781
  "model_id": "CohereForAI/c4ai-command-r-plus",
6839
7782
  "model_revision": "ba7f1d954c9d1609013677d87e4142ab95c34e62"
6840
7783
  },
6841
- {
6842
- "model_format": "gptq",
6843
- "model_size_in_billions": 104,
6844
- "quantizations": [
6845
- "Int4"
6846
- ],
6847
- "model_id": "alpindale/c4ai-command-r-plus-GPTQ",
6848
- "model_revision": "35febfc08f723ac0df32480eb4af349a7d08656e"
6849
- }
6850
- ],
6851
- "prompt_style": {
6852
- "style_name": "c4ai-command-r",
6853
- "system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
6854
- "roles": [
6855
- "<|USER_TOKEN|>",
6856
- "<|CHATBOT_TOKEN|>"
6857
- ],
6858
- "intra_message_sep": "",
6859
- "inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
6860
- "stop_token_ids": [
6861
- 6,
6862
- 255001
6863
- ]
6864
- }
6865
- },
6866
- {
6867
- "version": 1,
6868
- "context_length": 131072,
6869
- "model_name": "c4ai-command-r-v01-4bit",
6870
- "model_lang": [
6871
- "en",
6872
- "fr",
6873
- "de",
6874
- "es",
6875
- "it",
6876
- "pt",
6877
- "ja",
6878
- "ko",
6879
- "zh",
6880
- "ar"
6881
- ],
6882
- "model_ability": [
6883
- "generate"
6884
- ],
6885
- "model_description": "This model is 4bit quantized version of C4AI Command-R using bitsandbytes.",
6886
- "model_specs": [
6887
7784
  {
6888
7785
  "model_format": "pytorch",
6889
- "model_size_in_billions": 35,
7786
+ "model_size_in_billions": 104,
6890
7787
  "quantizations": [
6891
- "none"
7788
+ "4-bit"
6892
7789
  ],
6893
- "model_id": "CohereForAI/c4ai-command-r-v01-4bit",
6894
- "model_revision": "f2e87936a146643c9dd143422dcafb9cb1552611"
7790
+ "model_id": "CohereForAI/c4ai-command-r-plus-4bit",
7791
+ "model_revision": "bb63b5b7005ecedb30b0cfd0d5953b02a5817f7b"
6895
7792
  },
6896
7793
  {
6897
- "model_format": "pytorch",
7794
+ "model_format": "gptq",
6898
7795
  "model_size_in_billions": 104,
6899
7796
  "quantizations": [
6900
- "none"
7797
+ "Int4"
6901
7798
  ],
6902
- "model_id": "CohereForAI/c4ai-command-r-plus-4bit",
6903
- "model_revision": "bb63b5b7005ecedb30b0cfd0d5953b02a5817f7b"
7799
+ "model_id": "alpindale/c4ai-command-r-plus-GPTQ",
7800
+ "model_revision": "35febfc08f723ac0df32480eb4af349a7d08656e"
6904
7801
  }
6905
7802
  ],
6906
7803
  "prompt_style": {
@@ -7148,5 +8045,67 @@
7148
8045
  160132
7149
8046
  ]
7150
8047
  }
8048
+ },
8049
+ {
8050
+ "version": 1,
8051
+ "context_length": 32768,
8052
+ "model_name": "csg-wukong-chat-v0.1",
8053
+ "model_lang": [
8054
+ "en"
8055
+ ],
8056
+ "model_ability": [
8057
+ "chat"
8058
+ ],
8059
+ "model_description": "csg-wukong-1B is a 1 billion-parameter small language model(SLM) pretrained on 1T tokens.",
8060
+ "model_specs": [
8061
+ {
8062
+ "model_format": "pytorch",
8063
+ "model_size_in_billions": 1,
8064
+ "quantizations": [
8065
+ "none"
8066
+ ],
8067
+ "model_id": "opencsg/csg-wukong-1B-chat-v0.1",
8068
+ "model_revision": "2443c903d46074af0856e2ba11398dcd01d35536"
8069
+ },
8070
+ {
8071
+ "model_format": "ggufv2",
8072
+ "model_size_in_billions": 1,
8073
+ "quantizations": [
8074
+ "Q2_K",
8075
+ "Q3_K",
8076
+ "Q3_K_S",
8077
+ "Q3_K_M",
8078
+ "Q3_K_L",
8079
+ "Q4_0",
8080
+ "Q4_1",
8081
+ "Q4_K_S",
8082
+ "Q4_K_M",
8083
+ "Q5_0",
8084
+ "Q5_1",
8085
+ "Q5_K_S",
8086
+ "Q5_K_M",
8087
+ "Q6_K",
8088
+ "Q8_0"
8089
+ ],
8090
+ "model_id": "RichardErkhov/opencsg_-_csg-wukong-1B-chat-v0.1-gguf",
8091
+ "model_file_name_template": "csg-wukong-1B-chat-v0.1.{quantization}.gguf"
8092
+ }
8093
+ ],
8094
+ "prompt_style": {
8095
+ "style_name": "NO_COLON_TWO",
8096
+ "system_prompt": "<|system|>\nYou are a creative super artificial intelligence assistant, possessing all the knowledge of humankind. Your name is csg-wukong, developed by OpenCSG. You need to understand and infer the true intentions of users based on the topics discussed in the chat history, and respond to user questions correctly as required. You enjoy responding to users with accurate and insightful answers. Please pay attention to the appropriate style and format when replying, try to avoid repetitive words and sentences, and keep your responses as concise and profound as possible. You carefully consider the context of the discussion when replying to users. When the user says \"continue,\" please proceed with the continuation of the previous assistant's response.</s>\n",
8097
+ "roles": [
8098
+ "<|user|>\n",
8099
+ "<|assistant|>\n"
8100
+ ],
8101
+ "intra_message_sep": "</s>\n",
8102
+ "inter_message_sep": "</s>\n",
8103
+ "stop_token_ids": [
8104
+ 2
8105
+ ],
8106
+ "stop": [
8107
+ "</s>"
8108
+ ]
8109
+ }
7151
8110
  }
7152
8111
  ]