xinference 0.14.1.post1__py3-none-any.whl → 0.14.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (194) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +15 -34
  3. xinference/client/restful/restful_client.py +2 -2
  4. xinference/core/chat_interface.py +45 -10
  5. xinference/core/image_interface.py +9 -0
  6. xinference/core/model.py +8 -5
  7. xinference/core/scheduler.py +1 -2
  8. xinference/core/worker.py +49 -42
  9. xinference/deploy/cmdline.py +2 -2
  10. xinference/deploy/test/test_cmdline.py +7 -7
  11. xinference/model/audio/chattts.py +24 -9
  12. xinference/model/audio/core.py +8 -2
  13. xinference/model/audio/fish_speech.py +228 -0
  14. xinference/model/audio/model_spec.json +8 -0
  15. xinference/model/embedding/core.py +23 -1
  16. xinference/model/image/model_spec.json +2 -1
  17. xinference/model/image/model_spec_modelscope.json +2 -1
  18. xinference/model/image/stable_diffusion/core.py +49 -1
  19. xinference/model/llm/__init__.py +26 -27
  20. xinference/model/llm/{ggml/llamacpp.py → llama_cpp/core.py} +2 -35
  21. xinference/model/llm/llm_family.json +606 -1266
  22. xinference/model/llm/llm_family.py +16 -139
  23. xinference/model/llm/llm_family_modelscope.json +276 -313
  24. xinference/model/llm/lmdeploy/__init__.py +0 -0
  25. xinference/model/llm/lmdeploy/core.py +557 -0
  26. xinference/model/llm/memory.py +9 -9
  27. xinference/model/llm/sglang/core.py +2 -2
  28. xinference/model/llm/{pytorch → transformers}/chatglm.py +6 -13
  29. xinference/model/llm/{pytorch → transformers}/cogvlm2.py +4 -45
  30. xinference/model/llm/transformers/cogvlm2_video.py +524 -0
  31. xinference/model/llm/{pytorch → transformers}/core.py +3 -10
  32. xinference/model/llm/{pytorch → transformers}/glm4v.py +2 -23
  33. xinference/model/llm/transformers/intern_vl.py +540 -0
  34. xinference/model/llm/{pytorch → transformers}/internlm2.py +4 -8
  35. xinference/model/llm/{pytorch → transformers}/minicpmv25.py +2 -23
  36. xinference/model/llm/{pytorch → transformers}/minicpmv26.py +66 -41
  37. xinference/model/llm/{pytorch → transformers}/utils.py +1 -2
  38. xinference/model/llm/{pytorch → transformers}/yi_vl.py +2 -24
  39. xinference/model/llm/utils.py +85 -70
  40. xinference/model/llm/vllm/core.py +110 -11
  41. xinference/model/utils.py +1 -95
  42. xinference/thirdparty/fish_speech/__init__.py +0 -0
  43. xinference/thirdparty/fish_speech/fish_speech/__init__.py +0 -0
  44. xinference/thirdparty/fish_speech/fish_speech/callbacks/__init__.py +3 -0
  45. xinference/thirdparty/fish_speech/fish_speech/callbacks/grad_norm.py +113 -0
  46. xinference/thirdparty/fish_speech/fish_speech/configs/__init__.py +0 -0
  47. xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
  48. xinference/thirdparty/fish_speech/fish_speech/conversation.py +2 -0
  49. xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
  50. xinference/thirdparty/fish_speech/fish_speech/datasets/concat_repeat.py +53 -0
  51. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
  52. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text_data_pb2.py +33 -0
  53. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text_data_stream.py +36 -0
  54. xinference/thirdparty/fish_speech/fish_speech/datasets/semantic.py +496 -0
  55. xinference/thirdparty/fish_speech/fish_speech/datasets/vqgan.py +147 -0
  56. xinference/thirdparty/fish_speech/fish_speech/i18n/__init__.py +3 -0
  57. xinference/thirdparty/fish_speech/fish_speech/i18n/core.py +40 -0
  58. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
  59. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +122 -0
  60. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +122 -0
  61. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +123 -0
  62. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json +133 -0
  63. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +122 -0
  64. xinference/thirdparty/fish_speech/fish_speech/i18n/scan.py +122 -0
  65. xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
  66. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/__init__.py +0 -0
  67. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/lit_module.py +202 -0
  68. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +779 -0
  69. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/lora.py +92 -0
  70. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py +3 -0
  71. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py +442 -0
  72. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
  73. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py +44 -0
  74. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +625 -0
  75. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +139 -0
  76. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py +115 -0
  77. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/wavenet.py +225 -0
  78. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/utils.py +94 -0
  79. xinference/thirdparty/fish_speech/fish_speech/scheduler.py +40 -0
  80. xinference/thirdparty/fish_speech/fish_speech/text/__init__.py +4 -0
  81. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/__init__.py +0 -0
  82. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_class.py +172 -0
  83. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_constant.py +30 -0
  84. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_util.py +342 -0
  85. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/cardinal.py +32 -0
  86. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/date.py +75 -0
  87. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/digit.py +32 -0
  88. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/fraction.py +35 -0
  89. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/money.py +43 -0
  90. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/percentage.py +33 -0
  91. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/telephone.py +51 -0
  92. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/text.py +177 -0
  93. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +69 -0
  94. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +130 -0
  95. xinference/thirdparty/fish_speech/fish_speech/train.py +139 -0
  96. xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +23 -0
  97. xinference/thirdparty/fish_speech/fish_speech/utils/braceexpand.py +217 -0
  98. xinference/thirdparty/fish_speech/fish_speech/utils/context.py +13 -0
  99. xinference/thirdparty/fish_speech/fish_speech/utils/file.py +16 -0
  100. xinference/thirdparty/fish_speech/fish_speech/utils/instantiators.py +50 -0
  101. xinference/thirdparty/fish_speech/fish_speech/utils/logger.py +55 -0
  102. xinference/thirdparty/fish_speech/fish_speech/utils/logging_utils.py +48 -0
  103. xinference/thirdparty/fish_speech/fish_speech/utils/rich_utils.py +100 -0
  104. xinference/thirdparty/fish_speech/fish_speech/utils/spectrogram.py +122 -0
  105. xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +114 -0
  106. xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
  107. xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +120 -0
  108. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1237 -0
  109. xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
  110. xinference/thirdparty/fish_speech/tools/api.py +495 -0
  111. xinference/thirdparty/fish_speech/tools/auto_rerank.py +159 -0
  112. xinference/thirdparty/fish_speech/tools/download_models.py +55 -0
  113. xinference/thirdparty/fish_speech/tools/extract_model.py +21 -0
  114. xinference/thirdparty/fish_speech/tools/file.py +108 -0
  115. xinference/thirdparty/fish_speech/tools/gen_ref.py +36 -0
  116. xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
  117. xinference/thirdparty/fish_speech/tools/llama/build_dataset.py +169 -0
  118. xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +171 -0
  119. xinference/thirdparty/fish_speech/tools/llama/generate.py +698 -0
  120. xinference/thirdparty/fish_speech/tools/llama/merge_lora.py +95 -0
  121. xinference/thirdparty/fish_speech/tools/llama/quantize.py +497 -0
  122. xinference/thirdparty/fish_speech/tools/llama/rebuild_tokenizer.py +57 -0
  123. xinference/thirdparty/fish_speech/tools/merge_asr_files.py +55 -0
  124. xinference/thirdparty/fish_speech/tools/post_api.py +164 -0
  125. xinference/thirdparty/fish_speech/tools/sensevoice/__init__.py +0 -0
  126. xinference/thirdparty/fish_speech/tools/sensevoice/auto_model.py +573 -0
  127. xinference/thirdparty/fish_speech/tools/sensevoice/fun_asr.py +332 -0
  128. xinference/thirdparty/fish_speech/tools/sensevoice/vad_utils.py +61 -0
  129. xinference/thirdparty/fish_speech/tools/smart_pad.py +47 -0
  130. xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
  131. xinference/thirdparty/fish_speech/tools/vqgan/create_train_split.py +83 -0
  132. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +227 -0
  133. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +120 -0
  134. xinference/thirdparty/fish_speech/tools/webui.py +619 -0
  135. xinference/thirdparty/fish_speech/tools/whisper_asr.py +176 -0
  136. xinference/thirdparty/internvl/__init__.py +0 -0
  137. xinference/thirdparty/internvl/conversation.py +393 -0
  138. xinference/thirdparty/omnilmm/model/utils.py +16 -1
  139. xinference/web/ui/build/asset-manifest.json +3 -3
  140. xinference/web/ui/build/index.html +1 -1
  141. xinference/web/ui/build/static/js/main.661c7b0a.js +3 -0
  142. xinference/web/ui/build/static/js/{main.17ca0398.js.map → main.661c7b0a.js.map} +1 -1
  143. xinference/web/ui/node_modules/.cache/babel-loader/070d8c6b3b0f3485c6d3885f0b6bbfdf9643e088a468acbd5d596f2396071c16.json +1 -0
  144. xinference/web/ui/node_modules/.cache/babel-loader/213b5913e164773c2b0567455377765715f5f07225fbac77ad8e1e9dc9648a47.json +1 -0
  145. xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +1 -0
  146. xinference/web/ui/node_modules/.cache/babel-loader/5391543180fead1eeef5364300301498d58a7d91d62de3841a32768b67f4552f.json +1 -0
  147. xinference/web/ui/node_modules/.cache/babel-loader/5c26a23b5eacf5b752a08531577ae3840bb247745ef9a39583dc2d05ba93a82a.json +1 -0
  148. xinference/web/ui/node_modules/.cache/babel-loader/714c37ce0ec5b5c591033f02be2f3f491fdd70da3ef568ee4a4f94689a3d5ca2.json +1 -0
  149. xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +1 -0
  150. xinference/web/ui/node_modules/.cache/babel-loader/978b57d1a04a701bc3fcfebc511f5f274eed6ed7eade67f6fb76c27d5fd9ecc8.json +1 -0
  151. xinference/web/ui/node_modules/.cache/babel-loader/a797831de0dc74897f4b50b3426555d748f328b4c2cc391de709eadaf6a5f3e3.json +1 -0
  152. xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +1 -0
  153. xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +1 -0
  154. xinference/web/ui/node_modules/.cache/babel-loader/e91938976f229ce986b2907e51e1f00540b584ced0a315d498c172d13220739d.json +1 -0
  155. xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +1 -0
  156. {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/METADATA +22 -13
  157. {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/RECORD +170 -79
  158. xinference/locale/utils.py +0 -39
  159. xinference/locale/zh_CN.json +0 -26
  160. xinference/model/llm/ggml/tools/__init__.py +0 -15
  161. xinference/model/llm/ggml/tools/convert_ggml_to_gguf.py +0 -498
  162. xinference/model/llm/ggml/tools/gguf.py +0 -884
  163. xinference/model/llm/pytorch/__init__.py +0 -13
  164. xinference/model/llm/pytorch/baichuan.py +0 -81
  165. xinference/model/llm/pytorch/falcon.py +0 -138
  166. xinference/model/llm/pytorch/intern_vl.py +0 -352
  167. xinference/model/llm/pytorch/vicuna.py +0 -69
  168. xinference/web/ui/build/static/js/main.17ca0398.js +0 -3
  169. xinference/web/ui/node_modules/.cache/babel-loader/1444c41a4d04494f1cbc2d8c1537df107b451cb569cb2c1fbf5159f3a4841a5f.json +0 -1
  170. xinference/web/ui/node_modules/.cache/babel-loader/2f40209b32e7e46a2eab6b8c8a355eb42c3caa8bc3228dd929f32fd2b3940294.json +0 -1
  171. xinference/web/ui/node_modules/.cache/babel-loader/44774c783428f952d8e2e4ad0998a9c5bc16a57cd9c68b7c5ff18aaa5a41d65c.json +0 -1
  172. xinference/web/ui/node_modules/.cache/babel-loader/5262556baf9207738bf6a8ba141ec6599d0a636345c245d61fdf88d3171998cb.json +0 -1
  173. xinference/web/ui/node_modules/.cache/babel-loader/6450605fac003812485f6251b9f0caafbf2e5bfc3bbe2f000050d9e2fdb8dcd3.json +0 -1
  174. xinference/web/ui/node_modules/.cache/babel-loader/71684495d995c7e266eecc6a0ad8ea0284cc785f80abddf863789c57a6134969.json +0 -1
  175. xinference/web/ui/node_modules/.cache/babel-loader/80acd1edf31542ab1dcccfad02cb4b38f3325cff847a781fcce97500cfd6f878.json +0 -1
  176. xinference/web/ui/node_modules/.cache/babel-loader/8a9742ddd8ba8546ef42dc14caca443f2b4524fabed7bf269e0eff3b7b64ee7d.json +0 -1
  177. xinference/web/ui/node_modules/.cache/babel-loader/d06a96a3c9c32e42689094aa3aaad41c8125894e956b8f84a70fadce6e3f65b3.json +0 -1
  178. xinference/web/ui/node_modules/.cache/babel-loader/d93730e2b5d7e8c957b4d0965d2ed1dac9045a649adbd47c220d11f255d4b1e0.json +0 -1
  179. xinference/web/ui/node_modules/.cache/babel-loader/e656dc00b4d8b387f0a81ba8fc558767df1601c66369e2eb86a5ef27cf080572.json +0 -1
  180. xinference/web/ui/node_modules/.cache/babel-loader/f28b83886159d83b84f099b05d607a822dca4dd7f2d8aa6d56fe08bab0b5b086.json +0 -1
  181. xinference/web/ui/node_modules/.cache/babel-loader/f3e02274cb1964e99b1fe69cbb6db233d3d8d7dd05d50ebcdb8e66d50b224b7b.json +0 -1
  182. /xinference/{locale → model/llm/llama_cpp}/__init__.py +0 -0
  183. /xinference/model/llm/{ggml → transformers}/__init__.py +0 -0
  184. /xinference/model/llm/{pytorch → transformers}/compression.py +0 -0
  185. /xinference/model/llm/{pytorch → transformers}/deepseek_vl.py +0 -0
  186. /xinference/model/llm/{pytorch → transformers}/llama_2.py +0 -0
  187. /xinference/model/llm/{pytorch → transformers}/omnilmm.py +0 -0
  188. /xinference/model/llm/{pytorch → transformers}/qwen_vl.py +0 -0
  189. /xinference/model/llm/{pytorch → transformers}/tensorizer_utils.py +0 -0
  190. /xinference/web/ui/build/static/js/{main.17ca0398.js.LICENSE.txt → main.661c7b0a.js.LICENSE.txt} +0 -0
  191. {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/LICENSE +0 -0
  192. {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/WHEEL +0 -0
  193. {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/entry_points.txt +0 -0
  194. {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,172 @@
1
+ # -*- coding: utf-8 -*-
2
+ """基本类
3
+ 中文字符类
4
+ 中文数字/数位类
5
+ 中文数字类
6
+ 中文数位类
7
+ 中文数字系统类
8
+ 中文数学符号类
9
+ *中文其他符号类
10
+ """
11
+
12
+ __author__ = "Zhiyang Zhou <zyzhou@stu.xmu.edu.cn>"
13
+ __data__ = "2019-05-02"
14
+
15
+ from fish_speech.text.chn_text_norm.basic_constant import NUMBERING_TYPES
16
+
17
+
18
+ class ChineseChar(object):
19
+ """
20
+ 中文字符
21
+ 每个字符对应简体和繁体,
22
+ e.g. 简体 = '负', 繁体 = '負'
23
+ 转换时可转换为简体或繁体
24
+ """
25
+
26
+ def __init__(self, simplified, traditional):
27
+ self.simplified = simplified
28
+ self.traditional = traditional
29
+ self.__repr__ = self.__str__
30
+
31
+ def __str__(self):
32
+ return self.simplified or self.traditional or None
33
+
34
+ def __repr__(self):
35
+ return self.__str__()
36
+
37
+
38
+ class ChineseNumberUnit(ChineseChar):
39
+ """
40
+ 中文数字/数位字符
41
+ 每个字符除繁简体外还有一个额外的大写字符
42
+ e.g. '陆' 和 '陸'
43
+ """
44
+
45
+ def __init__(self, power, simplified, traditional, big_s, big_t):
46
+ super(ChineseNumberUnit, self).__init__(simplified, traditional)
47
+ self.power = power
48
+ self.big_s = big_s
49
+ self.big_t = big_t
50
+
51
+ def __str__(self):
52
+ return "10^{}".format(self.power)
53
+
54
+ @classmethod
55
+ def create(cls, index, value, numbering_type=NUMBERING_TYPES[1], small_unit=False):
56
+
57
+ if small_unit:
58
+ return ChineseNumberUnit(
59
+ power=index + 1,
60
+ simplified=value[0],
61
+ traditional=value[1],
62
+ big_s=value[1],
63
+ big_t=value[1],
64
+ )
65
+ elif numbering_type == NUMBERING_TYPES[0]:
66
+ return ChineseNumberUnit(
67
+ power=index + 8,
68
+ simplified=value[0],
69
+ traditional=value[1],
70
+ big_s=value[0],
71
+ big_t=value[1],
72
+ )
73
+ elif numbering_type == NUMBERING_TYPES[1]:
74
+ return ChineseNumberUnit(
75
+ power=(index + 2) * 4,
76
+ simplified=value[0],
77
+ traditional=value[1],
78
+ big_s=value[0],
79
+ big_t=value[1],
80
+ )
81
+ elif numbering_type == NUMBERING_TYPES[2]:
82
+ return ChineseNumberUnit(
83
+ power=pow(2, index + 3),
84
+ simplified=value[0],
85
+ traditional=value[1],
86
+ big_s=value[0],
87
+ big_t=value[1],
88
+ )
89
+ else:
90
+ raise ValueError(
91
+ "Counting type should be in {0} ({1} provided).".format(
92
+ NUMBERING_TYPES, numbering_type
93
+ )
94
+ )
95
+
96
+
97
+ class ChineseNumberDigit(ChineseChar):
98
+ """
99
+ 中文数字字符
100
+ """
101
+
102
+ def __init__(
103
+ self, value, simplified, traditional, big_s, big_t, alt_s=None, alt_t=None
104
+ ):
105
+ super(ChineseNumberDigit, self).__init__(simplified, traditional)
106
+ self.value = value
107
+ self.big_s = big_s
108
+ self.big_t = big_t
109
+ self.alt_s = alt_s
110
+ self.alt_t = alt_t
111
+
112
+ def __str__(self):
113
+ return str(self.value)
114
+
115
+ @classmethod
116
+ def create(cls, i, v):
117
+ return ChineseNumberDigit(i, v[0], v[1], v[2], v[3])
118
+
119
+
120
+ class ChineseMath(ChineseChar):
121
+ """
122
+ 中文数位字符
123
+ """
124
+
125
+ def __init__(self, simplified, traditional, symbol, expression=None):
126
+ super(ChineseMath, self).__init__(simplified, traditional)
127
+ self.symbol = symbol
128
+ self.expression = expression
129
+ self.big_s = simplified
130
+ self.big_t = traditional
131
+
132
+
133
+ CC, CNU, CND, CM = ChineseChar, ChineseNumberUnit, ChineseNumberDigit, ChineseMath
134
+
135
+
136
+ class NumberSystem(object):
137
+ """
138
+ 中文数字系统
139
+ """
140
+
141
+ pass
142
+
143
+
144
+ class MathSymbol(object):
145
+ """
146
+ 用于中文数字系统的数学符号 (繁/简体), e.g.
147
+ positive = ['正', '正']
148
+ negative = ['负', '負']
149
+ point = ['点', '點']
150
+ """
151
+
152
+ def __init__(self, positive, negative, point):
153
+ self.positive = positive
154
+ self.negative = negative
155
+ self.point = point
156
+
157
+ def __iter__(self):
158
+ for v in self.__dict__.values():
159
+ yield v
160
+
161
+
162
+ # class OtherSymbol(object):
163
+ # """
164
+ # 其他符号
165
+ # """
166
+ #
167
+ # def __init__(self, sil):
168
+ # self.sil = sil
169
+ #
170
+ # def __iter__(self):
171
+ # for v in self.__dict__.values():
172
+ # yield v
@@ -0,0 +1,30 @@
1
+ # -*- coding: utf-8 -*-
2
+ """基本常量
3
+ 中文数字/数位/符号字符常量
4
+ """
5
+
6
+ __author__ = "Zhiyang Zhou <zyzhou@stu.xmu.edu.cn>"
7
+ __data__ = "2019-05-02"
8
+
9
+ CHINESE_DIGIS = "零一二三四五六七八九"
10
+ BIG_CHINESE_DIGIS_SIMPLIFIED = "零壹贰叁肆伍陆柒捌玖"
11
+ BIG_CHINESE_DIGIS_TRADITIONAL = "零壹貳參肆伍陸柒捌玖"
12
+ SMALLER_BIG_CHINESE_UNITS_SIMPLIFIED = "十百千万"
13
+ SMALLER_BIG_CHINESE_UNITS_TRADITIONAL = "拾佰仟萬"
14
+ LARGER_CHINESE_NUMERING_UNITS_SIMPLIFIED = "亿兆京垓秭穰沟涧正载"
15
+ LARGER_CHINESE_NUMERING_UNITS_TRADITIONAL = "億兆京垓秭穰溝澗正載"
16
+ SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED = "十百千万"
17
+ SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL = "拾佰仟萬"
18
+
19
+ ZERO_ALT = "〇"
20
+ ONE_ALT = "幺"
21
+ TWO_ALTS = ["两", "兩"]
22
+
23
+ POSITIVE = ["正", "正"]
24
+ NEGATIVE = ["负", "負"]
25
+ POINT = ["点", "點"]
26
+ # PLUS = [u'加', u'加']
27
+ # SIL = [u'杠', u'槓']
28
+
29
+ # 中文数字系统类型
30
+ NUMBERING_TYPES = ["low", "mid", "high"]
@@ -0,0 +1,342 @@
1
+ # -*- coding: utf-8 -*-
2
+ """基本方法
3
+ 创建中文数字系统 方法
4
+ 中文字符串 <=> 数字串 方法
5
+ 数字串 <=> 中文字符串 方法
6
+ """
7
+
8
+ __author__ = "Zhiyang Zhou <zyzhou@stu.xmu.edu.cn>"
9
+ __data__ = "2019-05-02"
10
+
11
+ from fish_speech.text.chn_text_norm.basic_class import *
12
+ from fish_speech.text.chn_text_norm.basic_constant import *
13
+
14
+
15
+ def create_system(numbering_type=NUMBERING_TYPES[1]):
16
+ """
17
+ 根据数字系统类型返回创建相应的数字系统,默认为 mid
18
+ NUMBERING_TYPES = ['low', 'mid', 'high']: 中文数字系统类型
19
+ low: '兆' = '亿' * '十' = $10^{9}$, '京' = '兆' * '十', etc.
20
+ mid: '兆' = '亿' * '万' = $10^{12}$, '京' = '兆' * '万', etc.
21
+ high: '兆' = '亿' * '亿' = $10^{16}$, '京' = '兆' * '兆', etc.
22
+ 返回对应的数字系统
23
+ """
24
+
25
+ # chinese number units of '亿' and larger
26
+ all_larger_units = zip(
27
+ LARGER_CHINESE_NUMERING_UNITS_SIMPLIFIED,
28
+ LARGER_CHINESE_NUMERING_UNITS_TRADITIONAL,
29
+ )
30
+ larger_units = [
31
+ CNU.create(i, v, numbering_type, False) for i, v in enumerate(all_larger_units)
32
+ ]
33
+ # chinese number units of '十, 百, 千, 万'
34
+ all_smaller_units = zip(
35
+ SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED,
36
+ SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL,
37
+ )
38
+ smaller_units = [
39
+ CNU.create(i, v, small_unit=True) for i, v in enumerate(all_smaller_units)
40
+ ]
41
+ # digis
42
+ chinese_digis = zip(
43
+ CHINESE_DIGIS,
44
+ CHINESE_DIGIS,
45
+ BIG_CHINESE_DIGIS_SIMPLIFIED,
46
+ BIG_CHINESE_DIGIS_TRADITIONAL,
47
+ )
48
+ digits = [CND.create(i, v) for i, v in enumerate(chinese_digis)]
49
+ digits[0].alt_s, digits[0].alt_t = ZERO_ALT, ZERO_ALT
50
+ digits[1].alt_s, digits[1].alt_t = ONE_ALT, ONE_ALT
51
+ digits[2].alt_s, digits[2].alt_t = TWO_ALTS[0], TWO_ALTS[1]
52
+
53
+ # symbols
54
+ positive_cn = CM(POSITIVE[0], POSITIVE[1], "+", lambda x: x)
55
+ negative_cn = CM(NEGATIVE[0], NEGATIVE[1], "-", lambda x: -x)
56
+ point_cn = CM(POINT[0], POINT[1], ".", lambda x, y: float(str(x) + "." + str(y)))
57
+ # sil_cn = CM(SIL[0], SIL[1], '-', lambda x, y: float(str(x) + '-' + str(y)))
58
+ system = NumberSystem()
59
+ system.units = smaller_units + larger_units
60
+ system.digits = digits
61
+ system.math = MathSymbol(positive_cn, negative_cn, point_cn)
62
+ # system.symbols = OtherSymbol(sil_cn)
63
+ return system
64
+
65
+
66
+ def chn2num(chinese_string, numbering_type=NUMBERING_TYPES[1]):
67
+
68
+ def get_symbol(char, system):
69
+ for u in system.units:
70
+ if char in [u.traditional, u.simplified, u.big_s, u.big_t]:
71
+ return u
72
+ for d in system.digits:
73
+ if char in [
74
+ d.traditional,
75
+ d.simplified,
76
+ d.big_s,
77
+ d.big_t,
78
+ d.alt_s,
79
+ d.alt_t,
80
+ ]:
81
+ return d
82
+ for m in system.math:
83
+ if char in [m.traditional, m.simplified]:
84
+ return m
85
+
86
+ def string2symbols(chinese_string, system):
87
+ int_string, dec_string = chinese_string, ""
88
+ for p in [system.math.point.simplified, system.math.point.traditional]:
89
+ if p in chinese_string:
90
+ int_string, dec_string = chinese_string.split(p)
91
+ break
92
+ return [get_symbol(c, system) for c in int_string], [
93
+ get_symbol(c, system) for c in dec_string
94
+ ]
95
+
96
+ def correct_symbols(integer_symbols, system):
97
+ """
98
+ 一百八 to 一百八十
99
+ 一亿一千三百万 to 一亿 一千万 三百万
100
+ """
101
+
102
+ if integer_symbols and isinstance(integer_symbols[0], CNU):
103
+ if integer_symbols[0].power == 1:
104
+ integer_symbols = [system.digits[1]] + integer_symbols
105
+
106
+ if len(integer_symbols) > 1:
107
+ if isinstance(integer_symbols[-1], CND) and isinstance(
108
+ integer_symbols[-2], CNU
109
+ ):
110
+ integer_symbols.append(
111
+ CNU(integer_symbols[-2].power - 1, None, None, None, None)
112
+ )
113
+
114
+ result = []
115
+ unit_count = 0
116
+ for s in integer_symbols:
117
+ if isinstance(s, CND):
118
+ result.append(s)
119
+ unit_count = 0
120
+ elif isinstance(s, CNU):
121
+ current_unit = CNU(s.power, None, None, None, None)
122
+ unit_count += 1
123
+
124
+ if unit_count == 1:
125
+ result.append(current_unit)
126
+ elif unit_count > 1:
127
+ for i in range(len(result)):
128
+ if (
129
+ isinstance(result[-i - 1], CNU)
130
+ and result[-i - 1].power < current_unit.power
131
+ ):
132
+ result[-i - 1] = CNU(
133
+ result[-i - 1].power + current_unit.power,
134
+ None,
135
+ None,
136
+ None,
137
+ None,
138
+ )
139
+ return result
140
+
141
+ def compute_value(integer_symbols):
142
+ """
143
+ Compute the value.
144
+ When current unit is larger than previous unit, current unit * all previous units will be used as all previous units.
145
+ e.g. '两千万' = 2000 * 10000 not 2000 + 10000
146
+ """
147
+ value = [0]
148
+ last_power = 0
149
+ for s in integer_symbols:
150
+ if isinstance(s, CND):
151
+ value[-1] = s.value
152
+ elif isinstance(s, CNU):
153
+ value[-1] *= pow(10, s.power)
154
+ if s.power > last_power:
155
+ value[:-1] = list(map(lambda v: v * pow(10, s.power), value[:-1]))
156
+ last_power = s.power
157
+ value.append(0)
158
+ return sum(value)
159
+
160
+ system = create_system(numbering_type)
161
+ int_part, dec_part = string2symbols(chinese_string, system)
162
+ int_part = correct_symbols(int_part, system)
163
+ int_str = str(compute_value(int_part))
164
+ dec_str = "".join([str(d.value) for d in dec_part])
165
+ if dec_part:
166
+ return "{0}.{1}".format(int_str, dec_str)
167
+ else:
168
+ return int_str
169
+
170
+
171
+ def num2chn(
172
+ number_string,
173
+ numbering_type=NUMBERING_TYPES[1],
174
+ big=False,
175
+ traditional=False,
176
+ alt_zero=False,
177
+ alt_one=False,
178
+ alt_two=True,
179
+ use_zeros=True,
180
+ use_units=True,
181
+ ):
182
+
183
+ def get_value(value_string, use_zeros=True):
184
+
185
+ striped_string = value_string.lstrip("0")
186
+
187
+ # record nothing if all zeros
188
+ if not striped_string:
189
+ return []
190
+
191
+ # record one digits
192
+ elif len(striped_string) == 1:
193
+ if use_zeros and len(value_string) != len(striped_string):
194
+ return [system.digits[0], system.digits[int(striped_string)]]
195
+ else:
196
+ return [system.digits[int(striped_string)]]
197
+
198
+ # recursively record multiple digits
199
+ else:
200
+ result_unit = next(
201
+ u for u in reversed(system.units) if u.power < len(striped_string)
202
+ )
203
+ result_string = value_string[: -result_unit.power]
204
+ return (
205
+ get_value(result_string)
206
+ + [result_unit]
207
+ + get_value(striped_string[-result_unit.power :])
208
+ )
209
+
210
+ system = create_system(numbering_type)
211
+
212
+ int_dec = number_string.split(".")
213
+ if len(int_dec) == 1:
214
+ int_string = int_dec[0]
215
+ dec_string = ""
216
+ elif len(int_dec) == 2:
217
+ int_string = int_dec[0]
218
+ dec_string = int_dec[1]
219
+ else:
220
+ raise ValueError(
221
+ "invalid input num string with more than one dot: {}".format(number_string)
222
+ )
223
+
224
+ if use_units and len(int_string) > 1:
225
+ result_symbols = get_value(int_string)
226
+ else:
227
+ result_symbols = [system.digits[int(c)] for c in int_string]
228
+ dec_symbols = [system.digits[int(c)] for c in dec_string]
229
+ if dec_string:
230
+ result_symbols += [system.math.point] + dec_symbols
231
+
232
+ if alt_two:
233
+ liang = CND(
234
+ 2,
235
+ system.digits[2].alt_s,
236
+ system.digits[2].alt_t,
237
+ system.digits[2].big_s,
238
+ system.digits[2].big_t,
239
+ )
240
+ for i, v in enumerate(result_symbols):
241
+ if isinstance(v, CND) and v.value == 2:
242
+ next_symbol = (
243
+ result_symbols[i + 1] if i < len(result_symbols) - 1 else None
244
+ )
245
+ previous_symbol = result_symbols[i - 1] if i > 0 else None
246
+ if isinstance(next_symbol, CNU) and isinstance(
247
+ previous_symbol, (CNU, type(None))
248
+ ):
249
+ if next_symbol.power != 1 and (
250
+ (previous_symbol is None) or (previous_symbol.power != 1)
251
+ ):
252
+ result_symbols[i] = liang
253
+
254
+ # if big is True, '两' will not be used and `alt_two` has no impact on output
255
+ if big:
256
+ attr_name = "big_"
257
+ if traditional:
258
+ attr_name += "t"
259
+ else:
260
+ attr_name += "s"
261
+ else:
262
+ if traditional:
263
+ attr_name = "traditional"
264
+ else:
265
+ attr_name = "simplified"
266
+
267
+ result = "".join([getattr(s, attr_name) for s in result_symbols])
268
+
269
+ # if not use_zeros:
270
+ # result = result.strip(getattr(system.digits[0], attr_name))
271
+
272
+ if alt_zero:
273
+ result = result.replace(
274
+ getattr(system.digits[0], attr_name), system.digits[0].alt_s
275
+ )
276
+
277
+ if alt_one:
278
+ result = result.replace(
279
+ getattr(system.digits[1], attr_name), system.digits[1].alt_s
280
+ )
281
+
282
+ for i, p in enumerate(POINT):
283
+ if result.startswith(p):
284
+ return CHINESE_DIGIS[0] + result
285
+
286
+ # ^10, 11, .., 19
287
+ if (
288
+ len(result) >= 2
289
+ and result[1]
290
+ in [
291
+ SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED[0],
292
+ SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL[0],
293
+ ]
294
+ and result[0]
295
+ in [
296
+ CHINESE_DIGIS[1],
297
+ BIG_CHINESE_DIGIS_SIMPLIFIED[1],
298
+ BIG_CHINESE_DIGIS_TRADITIONAL[1],
299
+ ]
300
+ ):
301
+ result = result[1:]
302
+
303
+ return result
304
+
305
+
306
+ if __name__ == "__main__":
307
+
308
+ # 测试程序
309
+ all_chinese_number_string = (
310
+ CHINESE_DIGIS
311
+ + BIG_CHINESE_DIGIS_SIMPLIFIED
312
+ + BIG_CHINESE_DIGIS_TRADITIONAL
313
+ + LARGER_CHINESE_NUMERING_UNITS_SIMPLIFIED
314
+ + LARGER_CHINESE_NUMERING_UNITS_TRADITIONAL
315
+ + SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED
316
+ + SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL
317
+ + ZERO_ALT
318
+ + ONE_ALT
319
+ + "".join(TWO_ALTS + POSITIVE + NEGATIVE + POINT)
320
+ )
321
+
322
+ print("num:", chn2num("一万零四百零三点八零五"))
323
+ print("num:", chn2num("一亿六点三"))
324
+ print("num:", chn2num("一亿零六点三"))
325
+ print("num:", chn2num("两千零一亿六点三"))
326
+ # print('num:', chn2num('一零零八六'))
327
+ print("txt:", num2chn("10260.03", alt_zero=True))
328
+ print("txt:", num2chn("20037.090", numbering_type="low", traditional=True))
329
+ print("txt:", num2chn("100860001.77", numbering_type="high", big=True))
330
+ print(
331
+ "txt:",
332
+ num2chn(
333
+ "059523810880",
334
+ alt_one=True,
335
+ alt_two=False,
336
+ use_lzeros=True,
337
+ use_rzeros=True,
338
+ use_units=False,
339
+ ),
340
+ )
341
+
342
+ print(all_chinese_number_string)
@@ -0,0 +1,32 @@
1
+ # -*- coding: utf-8 -*-
2
+ """CARDINAL类 (包含小数DECIMAL类)
3
+ 纯数 <=> 中文字符串 方法
4
+ 中文字符串 <=> 纯数 方法
5
+ """
6
+
7
+ __author__ = "Zhiyang Zhou <zyzhou@stu.xmu.edu.cn>"
8
+ __data__ = "2019-05-03"
9
+
10
+ from fish_speech.text.chn_text_norm.basic_util import *
11
+
12
+
13
+ class Cardinal:
14
+ """
15
+ CARDINAL类
16
+ """
17
+
18
+ def __init__(self, cardinal=None, chntext=None):
19
+ self.cardinal = cardinal
20
+ self.chntext = chntext
21
+
22
+ def chntext2cardinal(self):
23
+ return chn2num(self.chntext)
24
+
25
+ def cardinal2chntext(self):
26
+ return num2chn(self.cardinal)
27
+
28
+
29
+ if __name__ == "__main__":
30
+
31
+ # 测试程序
32
+ print(Cardinal(cardinal="21357.230").cardinal2chntext())
@@ -0,0 +1,75 @@
1
+ # -*- coding: utf-8 -*-
2
+ """DATE类
3
+ 日期 <=> 中文字符串 方法
4
+ 中文字符串 <=> 日期 方法
5
+ """
6
+
7
+ __author__ = "Zhiyang Zhou <zyzhou@stu.xmu.edu.cn>"
8
+ __data__ = "2019-05-07"
9
+
10
+ from fish_speech.text.chn_text_norm.cardinal import Cardinal
11
+ from fish_speech.text.chn_text_norm.digit import Digit
12
+
13
+
14
+ class Date:
15
+ """
16
+ DATE类
17
+ """
18
+
19
+ def __init__(self, date=None, chntext=None):
20
+ self.date = date
21
+ self.chntext = chntext
22
+
23
+ # def chntext2date(self):
24
+ # chntext = self.chntext
25
+ # try:
26
+ # year, other = chntext.strip().split('年', maxsplit=1)
27
+ # year = Digit(chntext=year).digit2chntext() + '年'
28
+ # except ValueError:
29
+ # other = chntext
30
+ # year = ''
31
+ # if other:
32
+ # try:
33
+ # month, day = other.strip().split('月', maxsplit=1)
34
+ # month = Cardinal(chntext=month).chntext2cardinal() + '月'
35
+ # except ValueError:
36
+ # day = chntext
37
+ # month = ''
38
+ # if day:
39
+ # day = Cardinal(chntext=day[:-1]).chntext2cardinal() + day[-1]
40
+ # else:
41
+ # month = ''
42
+ # day = ''
43
+ # date = year + month + day
44
+ # self.date = date
45
+ # return self.date
46
+
47
+ def date2chntext(self):
48
+ date = self.date
49
+ try:
50
+ year, other = date.strip().split("年", maxsplit=1)
51
+ year = Digit(digit=year).digit2chntext() + "年"
52
+ except ValueError:
53
+ other = date
54
+ year = ""
55
+ if other:
56
+ try:
57
+ month, day = other.strip().split("月", maxsplit=1)
58
+ month = Cardinal(cardinal=month).cardinal2chntext() + "月"
59
+ except ValueError:
60
+ day = date
61
+ month = ""
62
+ if day:
63
+ day = Cardinal(cardinal=day[:-1]).cardinal2chntext() + day[-1]
64
+ else:
65
+ month = ""
66
+ day = ""
67
+ chntext = year + month + day
68
+ self.chntext = chntext
69
+ return self.chntext
70
+
71
+
72
+ if __name__ == "__main__":
73
+
74
+ # 测试
75
+ print(Date(date="09年3月16日").date2chntext())
@@ -0,0 +1,32 @@
1
+ # -*- coding: utf-8 -*-
2
+ """DIGIT类
3
+ 数字串 <=> 中文字符串 方法
4
+ 中文字符串 <=> 数字串 方法
5
+ """
6
+
7
+ __author__ = "Zhiyang Zhou <zyzhou@stu.xmu.edu.cn>"
8
+ __data__ = "2019-05-03"
9
+
10
+ from fish_speech.text.chn_text_norm.basic_util import *
11
+
12
+
13
+ class Digit:
14
+ """
15
+ DIGIT类
16
+ """
17
+
18
+ def __init__(self, digit=None, chntext=None):
19
+ self.digit = digit
20
+ self.chntext = chntext
21
+
22
+ # def chntext2digit(self):
23
+ # return chn2num(self.chntext)
24
+
25
+ def digit2chntext(self):
26
+ return num2chn(self.digit, alt_two=False, use_units=False)
27
+
28
+
29
+ if __name__ == "__main__":
30
+
31
+ # 测试程序
32
+ print(Digit(digit="2016").digit2chntext())