xinference 0.14.4.post1__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (194) hide show
  1. xinference/_compat.py +51 -0
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +209 -40
  4. xinference/client/restful/restful_client.py +7 -26
  5. xinference/conftest.py +1 -1
  6. xinference/constants.py +5 -0
  7. xinference/core/cache_tracker.py +1 -1
  8. xinference/core/chat_interface.py +8 -14
  9. xinference/core/event.py +1 -1
  10. xinference/core/image_interface.py +28 -0
  11. xinference/core/model.py +110 -31
  12. xinference/core/scheduler.py +37 -37
  13. xinference/core/status_guard.py +1 -1
  14. xinference/core/supervisor.py +17 -10
  15. xinference/core/utils.py +80 -22
  16. xinference/core/worker.py +17 -16
  17. xinference/deploy/cmdline.py +8 -16
  18. xinference/deploy/local.py +1 -1
  19. xinference/deploy/supervisor.py +1 -1
  20. xinference/deploy/utils.py +1 -1
  21. xinference/deploy/worker.py +1 -1
  22. xinference/model/audio/cosyvoice.py +86 -41
  23. xinference/model/audio/fish_speech.py +9 -9
  24. xinference/model/audio/model_spec.json +9 -9
  25. xinference/model/audio/whisper.py +4 -1
  26. xinference/model/embedding/core.py +52 -31
  27. xinference/model/image/core.py +2 -1
  28. xinference/model/image/model_spec.json +16 -4
  29. xinference/model/image/model_spec_modelscope.json +16 -4
  30. xinference/model/image/sdapi.py +136 -0
  31. xinference/model/image/stable_diffusion/core.py +164 -19
  32. xinference/model/llm/__init__.py +29 -11
  33. xinference/model/llm/llama_cpp/core.py +16 -33
  34. xinference/model/llm/llm_family.json +1011 -1296
  35. xinference/model/llm/llm_family.py +34 -53
  36. xinference/model/llm/llm_family_csghub.json +18 -35
  37. xinference/model/llm/llm_family_modelscope.json +981 -1122
  38. xinference/model/llm/lmdeploy/core.py +56 -88
  39. xinference/model/llm/mlx/core.py +46 -69
  40. xinference/model/llm/sglang/core.py +36 -18
  41. xinference/model/llm/transformers/chatglm.py +168 -306
  42. xinference/model/llm/transformers/cogvlm2.py +36 -63
  43. xinference/model/llm/transformers/cogvlm2_video.py +33 -223
  44. xinference/model/llm/transformers/core.py +55 -50
  45. xinference/model/llm/transformers/deepseek_v2.py +340 -0
  46. xinference/model/llm/transformers/deepseek_vl.py +53 -96
  47. xinference/model/llm/transformers/glm4v.py +55 -111
  48. xinference/model/llm/transformers/intern_vl.py +39 -70
  49. xinference/model/llm/transformers/internlm2.py +32 -54
  50. xinference/model/llm/transformers/minicpmv25.py +22 -55
  51. xinference/model/llm/transformers/minicpmv26.py +158 -68
  52. xinference/model/llm/transformers/omnilmm.py +5 -28
  53. xinference/model/llm/transformers/qwen2_audio.py +168 -0
  54. xinference/model/llm/transformers/qwen2_vl.py +234 -0
  55. xinference/model/llm/transformers/qwen_vl.py +34 -86
  56. xinference/model/llm/transformers/utils.py +32 -38
  57. xinference/model/llm/transformers/yi_vl.py +32 -72
  58. xinference/model/llm/utils.py +280 -554
  59. xinference/model/llm/vllm/core.py +161 -100
  60. xinference/model/rerank/core.py +41 -8
  61. xinference/model/rerank/model_spec.json +7 -0
  62. xinference/model/rerank/model_spec_modelscope.json +7 -1
  63. xinference/model/utils.py +1 -31
  64. xinference/thirdparty/cosyvoice/bin/export_jit.py +64 -0
  65. xinference/thirdparty/cosyvoice/bin/export_trt.py +8 -0
  66. xinference/thirdparty/cosyvoice/bin/inference.py +5 -2
  67. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +38 -22
  68. xinference/thirdparty/cosyvoice/cli/model.py +139 -26
  69. xinference/thirdparty/cosyvoice/flow/flow.py +15 -9
  70. xinference/thirdparty/cosyvoice/flow/length_regulator.py +20 -1
  71. xinference/thirdparty/cosyvoice/hifigan/generator.py +8 -4
  72. xinference/thirdparty/cosyvoice/llm/llm.py +14 -13
  73. xinference/thirdparty/cosyvoice/transformer/attention.py +7 -3
  74. xinference/thirdparty/cosyvoice/transformer/decoder.py +1 -1
  75. xinference/thirdparty/cosyvoice/transformer/embedding.py +4 -3
  76. xinference/thirdparty/cosyvoice/transformer/encoder.py +4 -2
  77. xinference/thirdparty/cosyvoice/utils/common.py +36 -0
  78. xinference/thirdparty/cosyvoice/utils/file_utils.py +16 -0
  79. xinference/thirdparty/deepseek_vl/serve/assets/Kelpy-Codos.js +100 -0
  80. xinference/thirdparty/deepseek_vl/serve/assets/avatar.png +0 -0
  81. xinference/thirdparty/deepseek_vl/serve/assets/custom.css +355 -0
  82. xinference/thirdparty/deepseek_vl/serve/assets/custom.js +22 -0
  83. xinference/thirdparty/deepseek_vl/serve/assets/favicon.ico +0 -0
  84. xinference/thirdparty/deepseek_vl/serve/examples/app.png +0 -0
  85. xinference/thirdparty/deepseek_vl/serve/examples/chart.png +0 -0
  86. xinference/thirdparty/deepseek_vl/serve/examples/mirror.png +0 -0
  87. xinference/thirdparty/deepseek_vl/serve/examples/pipeline.png +0 -0
  88. xinference/thirdparty/deepseek_vl/serve/examples/puzzle.png +0 -0
  89. xinference/thirdparty/deepseek_vl/serve/examples/rap.jpeg +0 -0
  90. xinference/thirdparty/fish_speech/fish_speech/configs/base.yaml +87 -0
  91. xinference/thirdparty/fish_speech/fish_speech/configs/firefly_gan_vq.yaml +33 -0
  92. xinference/thirdparty/fish_speech/fish_speech/configs/lora/r_8_alpha_16.yaml +4 -0
  93. xinference/thirdparty/fish_speech/fish_speech/configs/text2semantic_finetune.yaml +83 -0
  94. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text-data.proto +24 -0
  95. xinference/thirdparty/fish_speech/fish_speech/i18n/README.md +27 -0
  96. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +1 -1
  97. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +1 -1
  98. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +1 -1
  99. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json +1 -1
  100. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +1 -1
  101. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +2 -2
  102. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py +0 -3
  103. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +169 -198
  104. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +4 -27
  105. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/.gitignore +114 -0
  106. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/README.md +36 -0
  107. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +9 -47
  108. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +2 -2
  109. xinference/thirdparty/fish_speech/fish_speech/train.py +2 -0
  110. xinference/thirdparty/fish_speech/fish_speech/webui/css/style.css +161 -0
  111. xinference/thirdparty/fish_speech/fish_speech/webui/html/footer.html +11 -0
  112. xinference/thirdparty/fish_speech/fish_speech/webui/js/animate.js +69 -0
  113. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +12 -10
  114. xinference/thirdparty/fish_speech/tools/api.py +79 -134
  115. xinference/thirdparty/fish_speech/tools/commons.py +35 -0
  116. xinference/thirdparty/fish_speech/tools/download_models.py +3 -3
  117. xinference/thirdparty/fish_speech/tools/file.py +17 -0
  118. xinference/thirdparty/fish_speech/tools/llama/build_dataset.py +1 -1
  119. xinference/thirdparty/fish_speech/tools/llama/generate.py +29 -24
  120. xinference/thirdparty/fish_speech/tools/llama/merge_lora.py +1 -1
  121. xinference/thirdparty/fish_speech/tools/llama/quantize.py +2 -2
  122. xinference/thirdparty/fish_speech/tools/msgpack_api.py +34 -0
  123. xinference/thirdparty/fish_speech/tools/post_api.py +85 -44
  124. xinference/thirdparty/fish_speech/tools/sensevoice/README.md +59 -0
  125. xinference/thirdparty/fish_speech/tools/sensevoice/fun_asr.py +1 -1
  126. xinference/thirdparty/fish_speech/tools/smart_pad.py +16 -3
  127. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +2 -2
  128. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +4 -2
  129. xinference/thirdparty/fish_speech/tools/webui.py +12 -146
  130. xinference/thirdparty/matcha/VERSION +1 -0
  131. xinference/thirdparty/matcha/hifigan/LICENSE +21 -0
  132. xinference/thirdparty/matcha/hifigan/README.md +101 -0
  133. xinference/thirdparty/omnilmm/LICENSE +201 -0
  134. xinference/thirdparty/whisper/__init__.py +156 -0
  135. xinference/thirdparty/whisper/__main__.py +3 -0
  136. xinference/thirdparty/whisper/assets/gpt2.tiktoken +50256 -0
  137. xinference/thirdparty/whisper/assets/mel_filters.npz +0 -0
  138. xinference/thirdparty/whisper/assets/multilingual.tiktoken +50257 -0
  139. xinference/thirdparty/whisper/audio.py +157 -0
  140. xinference/thirdparty/whisper/decoding.py +826 -0
  141. xinference/thirdparty/whisper/model.py +314 -0
  142. xinference/thirdparty/whisper/normalizers/__init__.py +2 -0
  143. xinference/thirdparty/whisper/normalizers/basic.py +76 -0
  144. xinference/thirdparty/whisper/normalizers/english.json +1741 -0
  145. xinference/thirdparty/whisper/normalizers/english.py +550 -0
  146. xinference/thirdparty/whisper/timing.py +386 -0
  147. xinference/thirdparty/whisper/tokenizer.py +395 -0
  148. xinference/thirdparty/whisper/transcribe.py +605 -0
  149. xinference/thirdparty/whisper/triton_ops.py +109 -0
  150. xinference/thirdparty/whisper/utils.py +316 -0
  151. xinference/thirdparty/whisper/version.py +1 -0
  152. xinference/types.py +14 -53
  153. xinference/web/ui/build/asset-manifest.json +6 -6
  154. xinference/web/ui/build/index.html +1 -1
  155. xinference/web/ui/build/static/css/{main.4bafd904.css → main.5061c4c3.css} +2 -2
  156. xinference/web/ui/build/static/css/main.5061c4c3.css.map +1 -0
  157. xinference/web/ui/build/static/js/main.754740c0.js +3 -0
  158. xinference/web/ui/build/static/js/{main.eb13fe95.js.LICENSE.txt → main.754740c0.js.LICENSE.txt} +2 -0
  159. xinference/web/ui/build/static/js/main.754740c0.js.map +1 -0
  160. xinference/web/ui/node_modules/.cache/babel-loader/10c69dc7a296779fcffedeff9393d832dfcb0013c36824adf623d3c518b801ff.json +1 -0
  161. xinference/web/ui/node_modules/.cache/babel-loader/68bede6d95bb5ef0b35bbb3ec5b8c937eaf6862c6cdbddb5ef222a7776aaf336.json +1 -0
  162. xinference/web/ui/node_modules/.cache/babel-loader/77d50223f3e734d4485cca538cb098a8c3a7a0a1a9f01f58cdda3af42fe1adf5.json +1 -0
  163. xinference/web/ui/node_modules/.cache/babel-loader/a56d5a642409a84988891089c98ca28ad0546432dfbae8aaa51bc5a280e1cdd2.json +1 -0
  164. xinference/web/ui/node_modules/.cache/babel-loader/cd90b08d177025dfe84209596fc51878f8a86bcaa6a240848a3d2e5fd4c7ff24.json +1 -0
  165. xinference/web/ui/node_modules/.cache/babel-loader/d9ff696a3e3471f01b46c63d18af32e491eb5dc0e43cb30202c96871466df57f.json +1 -0
  166. xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +1 -0
  167. xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +1 -0
  168. xinference/web/ui/node_modules/.package-lock.json +37 -0
  169. xinference/web/ui/node_modules/a-sync-waterfall/package.json +21 -0
  170. xinference/web/ui/node_modules/nunjucks/node_modules/commander/package.json +48 -0
  171. xinference/web/ui/node_modules/nunjucks/package.json +112 -0
  172. xinference/web/ui/package-lock.json +38 -0
  173. xinference/web/ui/package.json +1 -0
  174. {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/METADATA +16 -10
  175. {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/RECORD +179 -127
  176. xinference/model/llm/transformers/llama_2.py +0 -108
  177. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py +0 -442
  178. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py +0 -44
  179. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py +0 -115
  180. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/wavenet.py +0 -225
  181. xinference/thirdparty/fish_speech/tools/auto_rerank.py +0 -159
  182. xinference/thirdparty/fish_speech/tools/gen_ref.py +0 -36
  183. xinference/thirdparty/fish_speech/tools/merge_asr_files.py +0 -55
  184. xinference/web/ui/build/static/css/main.4bafd904.css.map +0 -1
  185. xinference/web/ui/build/static/js/main.eb13fe95.js +0 -3
  186. xinference/web/ui/build/static/js/main.eb13fe95.js.map +0 -1
  187. xinference/web/ui/node_modules/.cache/babel-loader/0b11a5339468c13b2d31ac085e7effe4303259b2071abd46a0a8eb8529233a5e.json +0 -1
  188. xinference/web/ui/node_modules/.cache/babel-loader/213b5913e164773c2b0567455377765715f5f07225fbac77ad8e1e9dc9648a47.json +0 -1
  189. xinference/web/ui/node_modules/.cache/babel-loader/5c26a23b5eacf5b752a08531577ae3840bb247745ef9a39583dc2d05ba93a82a.json +0 -1
  190. xinference/web/ui/node_modules/.cache/babel-loader/978b57d1a04a701bc3fcfebc511f5f274eed6ed7eade67f6fb76c27d5fd9ecc8.json +0 -1
  191. {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/LICENSE +0 -0
  192. {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/WHEEL +0 -0
  193. {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/entry_points.txt +0 -0
  194. {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/top_level.txt +0 -0
@@ -1,225 +0,0 @@
1
- import math
2
- from typing import Optional
3
-
4
- import torch
5
- import torch.nn.functional as F
6
- from torch import nn
7
-
8
-
9
- class Mish(nn.Module):
10
- def forward(self, x):
11
- return x * torch.tanh(F.softplus(x))
12
-
13
-
14
- class DiffusionEmbedding(nn.Module):
15
- """Diffusion Step Embedding"""
16
-
17
- def __init__(self, d_denoiser):
18
- super(DiffusionEmbedding, self).__init__()
19
- self.dim = d_denoiser
20
-
21
- def forward(self, x):
22
- device = x.device
23
- half_dim = self.dim // 2
24
- emb = math.log(10000) / (half_dim - 1)
25
- emb = torch.exp(torch.arange(half_dim, device=device) * -emb)
26
- emb = x[:, None] * emb[None, :]
27
- emb = torch.cat((emb.sin(), emb.cos()), dim=-1)
28
- return emb
29
-
30
-
31
- class LinearNorm(nn.Module):
32
- """LinearNorm Projection"""
33
-
34
- def __init__(self, in_features, out_features, bias=False):
35
- super(LinearNorm, self).__init__()
36
- self.linear = nn.Linear(in_features, out_features, bias)
37
-
38
- nn.init.xavier_uniform_(self.linear.weight)
39
- if bias:
40
- nn.init.constant_(self.linear.bias, 0.0)
41
-
42
- def forward(self, x):
43
- x = self.linear(x)
44
- return x
45
-
46
-
47
- class ConvNorm(nn.Module):
48
- """1D Convolution"""
49
-
50
- def __init__(
51
- self,
52
- in_channels,
53
- out_channels,
54
- kernel_size=1,
55
- stride=1,
56
- padding=None,
57
- dilation=1,
58
- bias=True,
59
- w_init_gain="linear",
60
- ):
61
- super(ConvNorm, self).__init__()
62
-
63
- if padding is None:
64
- assert kernel_size % 2 == 1
65
- padding = int(dilation * (kernel_size - 1) / 2)
66
-
67
- self.conv = nn.Conv1d(
68
- in_channels,
69
- out_channels,
70
- kernel_size=kernel_size,
71
- stride=stride,
72
- padding=padding,
73
- dilation=dilation,
74
- bias=bias,
75
- )
76
- nn.init.kaiming_normal_(self.conv.weight)
77
-
78
- def forward(self, signal):
79
- conv_signal = self.conv(signal)
80
-
81
- return conv_signal
82
-
83
-
84
- class ResidualBlock(nn.Module):
85
- """Residual Block"""
86
-
87
- def __init__(
88
- self,
89
- residual_channels,
90
- use_linear_bias=False,
91
- dilation=1,
92
- condition_channels=None,
93
- ):
94
- super(ResidualBlock, self).__init__()
95
- self.conv_layer = ConvNorm(
96
- residual_channels,
97
- 2 * residual_channels,
98
- kernel_size=3,
99
- stride=1,
100
- padding=dilation,
101
- dilation=dilation,
102
- )
103
-
104
- if condition_channels is not None:
105
- self.diffusion_projection = LinearNorm(
106
- residual_channels, residual_channels, use_linear_bias
107
- )
108
- self.condition_projection = ConvNorm(
109
- condition_channels, 2 * residual_channels, kernel_size=1
110
- )
111
-
112
- self.output_projection = ConvNorm(
113
- residual_channels, 2 * residual_channels, kernel_size=1
114
- )
115
-
116
- def forward(self, x, condition=None, diffusion_step=None):
117
- y = x
118
-
119
- if diffusion_step is not None:
120
- diffusion_step = self.diffusion_projection(diffusion_step).unsqueeze(-1)
121
- y = y + diffusion_step
122
-
123
- y = self.conv_layer(y)
124
-
125
- if condition is not None:
126
- condition = self.condition_projection(condition)
127
- y = y + condition
128
-
129
- gate, filter = torch.chunk(y, 2, dim=1)
130
- y = torch.sigmoid(gate) * torch.tanh(filter)
131
-
132
- y = self.output_projection(y)
133
- residual, skip = torch.chunk(y, 2, dim=1)
134
-
135
- return (x + residual) / math.sqrt(2.0), skip
136
-
137
-
138
- class WaveNet(nn.Module):
139
- def __init__(
140
- self,
141
- input_channels: Optional[int] = None,
142
- output_channels: Optional[int] = None,
143
- residual_channels: int = 512,
144
- residual_layers: int = 20,
145
- dilation_cycle: Optional[int] = 4,
146
- is_diffusion: bool = False,
147
- condition_channels: Optional[int] = None,
148
- ):
149
- super().__init__()
150
-
151
- # Input projection
152
- self.input_projection = None
153
- if input_channels is not None and input_channels != residual_channels:
154
- self.input_projection = ConvNorm(
155
- input_channels, residual_channels, kernel_size=1
156
- )
157
-
158
- if input_channels is None:
159
- input_channels = residual_channels
160
-
161
- self.input_channels = input_channels
162
-
163
- # Residual layers
164
- self.residual_layers = nn.ModuleList(
165
- [
166
- ResidualBlock(
167
- residual_channels=residual_channels,
168
- use_linear_bias=False,
169
- dilation=2 ** (i % dilation_cycle) if dilation_cycle else 1,
170
- condition_channels=condition_channels,
171
- )
172
- for i in range(residual_layers)
173
- ]
174
- )
175
-
176
- # Skip projection
177
- self.skip_projection = ConvNorm(
178
- residual_channels, residual_channels, kernel_size=1
179
- )
180
-
181
- # Output projection
182
- self.output_projection = None
183
- if output_channels is not None and output_channels != residual_channels:
184
- self.output_projection = ConvNorm(
185
- residual_channels, output_channels, kernel_size=1
186
- )
187
-
188
- if is_diffusion:
189
- self.diffusion_embedding = DiffusionEmbedding(residual_channels)
190
- self.mlp = nn.Sequential(
191
- LinearNorm(residual_channels, residual_channels * 4, False),
192
- Mish(),
193
- LinearNorm(residual_channels * 4, residual_channels, False),
194
- )
195
-
196
- self.apply(self._init_weights)
197
-
198
- def _init_weights(self, m):
199
- if isinstance(m, (nn.Conv1d, nn.Linear)):
200
- nn.init.trunc_normal_(m.weight, std=0.02)
201
- if getattr(m, "bias", None) is not None:
202
- nn.init.constant_(m.bias, 0)
203
-
204
- def forward(self, x, t=None, condition=None):
205
- if self.input_projection is not None:
206
- x = self.input_projection(x)
207
- x = F.silu(x)
208
-
209
- if t is not None:
210
- t = self.diffusion_embedding(t)
211
- t = self.mlp(t)
212
-
213
- skip = []
214
- for layer in self.residual_layers:
215
- x, skip_connection = layer(x, condition, t)
216
- skip.append(skip_connection)
217
-
218
- x = torch.sum(torch.stack(skip), dim=0) / math.sqrt(len(self.residual_layers))
219
- x = self.skip_projection(x)
220
-
221
- if self.output_projection is not None:
222
- x = F.silu(x)
223
- x = self.output_projection(x)
224
-
225
- return x
@@ -1,159 +0,0 @@
1
- import os
2
-
3
- os.environ["MODELSCOPE_CACHE"] = ".cache/"
4
-
5
- import string
6
- import time
7
- from threading import Lock
8
-
9
- import librosa
10
- import numpy as np
11
- import opencc
12
- import torch
13
- from faster_whisper import WhisperModel
14
-
15
- t2s_converter = opencc.OpenCC("t2s")
16
-
17
-
18
- def load_model(*, device="cuda"):
19
- model = WhisperModel(
20
- "medium",
21
- device=device,
22
- compute_type="float16",
23
- download_root="faster_whisper",
24
- )
25
- print("faster_whisper loaded!")
26
- return model
27
-
28
-
29
- @torch.no_grad()
30
- def batch_asr_internal(model: WhisperModel, audios, sr):
31
- resampled_audios = []
32
- for audio in audios:
33
-
34
- if isinstance(audio, np.ndarray):
35
- audio = torch.from_numpy(audio).float()
36
-
37
- if audio.dim() > 1:
38
- audio = audio.squeeze()
39
-
40
- assert audio.dim() == 1
41
- audio_np = audio.numpy()
42
- resampled_audio = librosa.resample(audio_np, orig_sr=sr, target_sr=16000)
43
- resampled_audios.append(resampled_audio)
44
-
45
- trans_results = []
46
-
47
- for resampled_audio in resampled_audios:
48
- segments, info = model.transcribe(
49
- resampled_audio,
50
- language=None,
51
- beam_size=5,
52
- initial_prompt="Punctuation is needed in any language.",
53
- )
54
- trans_results.append(list(segments))
55
-
56
- results = []
57
- for trans_res, audio in zip(trans_results, audios):
58
-
59
- duration = len(audio) / sr * 1000
60
- huge_gap = False
61
- max_gap = 0.0
62
-
63
- text = None
64
- last_tr = None
65
-
66
- for tr in trans_res:
67
- delta = tr.text.strip()
68
- if tr.id > 1:
69
- max_gap = max(tr.start - last_tr.end, max_gap)
70
- text += delta
71
- else:
72
- text = delta
73
-
74
- last_tr = tr
75
- if max_gap > 3.0:
76
- huge_gap = True
77
- break
78
-
79
- sim_text = t2s_converter.convert(text)
80
- results.append(
81
- {
82
- "text": sim_text,
83
- "duration": duration,
84
- "huge_gap": huge_gap,
85
- }
86
- )
87
-
88
- return results
89
-
90
-
91
- global_lock = Lock()
92
-
93
-
94
- def batch_asr(model, audios, sr):
95
- return batch_asr_internal(model, audios, sr)
96
-
97
-
98
- def is_chinese(text):
99
- return True
100
-
101
-
102
- def calculate_wer(text1, text2, debug=False):
103
- chars1 = remove_punctuation(text1)
104
- chars2 = remove_punctuation(text2)
105
-
106
- m, n = len(chars1), len(chars2)
107
-
108
- if m > n:
109
- chars1, chars2 = chars2, chars1
110
- m, n = n, m
111
-
112
- prev = list(range(m + 1)) # row 0 distance: [0, 1, 2, ...]
113
- curr = [0] * (m + 1)
114
-
115
- for j in range(1, n + 1):
116
- curr[0] = j
117
- for i in range(1, m + 1):
118
- if chars1[i - 1] == chars2[j - 1]:
119
- curr[i] = prev[i - 1]
120
- else:
121
- curr[i] = min(prev[i], curr[i - 1], prev[i - 1]) + 1
122
- prev, curr = curr, prev
123
-
124
- edits = prev[m]
125
- tot = max(len(chars1), len(chars2))
126
- wer = edits / tot
127
-
128
- if debug:
129
- print(" gt: ", chars1)
130
- print(" pred: ", chars2)
131
- print(" edits/tot = wer: ", edits, "/", tot, "=", wer)
132
-
133
- return wer
134
-
135
-
136
- def remove_punctuation(text):
137
- chinese_punctuation = (
138
- " \n\t”“!?。。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—"
139
- '‛""„‟…‧﹏'
140
- )
141
- all_punctuation = string.punctuation + chinese_punctuation
142
- translator = str.maketrans("", "", all_punctuation)
143
- text_without_punctuation = text.translate(translator)
144
- return text_without_punctuation
145
-
146
-
147
- if __name__ == "__main__":
148
- model = load_model()
149
- audios = [
150
- librosa.load("44100.wav", sr=44100)[0],
151
- librosa.load("lengyue.wav", sr=44100)[0],
152
- ]
153
- print(np.array(audios[0]))
154
- print(batch_asr(model, audios, 44100))
155
-
156
- start_time = time.time()
157
- for _ in range(10):
158
- print(batch_asr(model, audios, 44100))
159
- print("Time taken:", time.time() - start_time)
@@ -1,36 +0,0 @@
1
- import json
2
- from pathlib import Path
3
-
4
-
5
- def scan_folder(base_path):
6
- wav_lab_pairs = {}
7
-
8
- base = Path(base_path)
9
- for suf in ["wav", "lab"]:
10
- for f in base.rglob(f"*.{suf}"):
11
- relative_path = f.relative_to(base)
12
- parts = relative_path.parts
13
- print(parts)
14
- if len(parts) >= 3:
15
- character = parts[0]
16
- emotion = parts[1]
17
-
18
- if character not in wav_lab_pairs:
19
- wav_lab_pairs[character] = {}
20
- if emotion not in wav_lab_pairs[character]:
21
- wav_lab_pairs[character][emotion] = []
22
- wav_lab_pairs[character][emotion].append(str(f.name))
23
-
24
- return wav_lab_pairs
25
-
26
-
27
- def save_to_json(data, output_file):
28
- with open(output_file, "w", encoding="utf-8") as file:
29
- json.dump(data, file, ensure_ascii=False, indent=2)
30
-
31
-
32
- base_path = "ref_data"
33
- out_ref_file = "ref_data.json"
34
-
35
- wav_lab_pairs = scan_folder(base_path)
36
- save_to_json(wav_lab_pairs, out_ref_file)
@@ -1,55 +0,0 @@
1
- import os
2
- from pathlib import Path
3
-
4
- from pydub import AudioSegment
5
- from tqdm import tqdm
6
-
7
- from tools.file import AUDIO_EXTENSIONS, list_files
8
-
9
-
10
- def merge_and_delete_files(save_dir, original_files):
11
- save_path = Path(save_dir)
12
- audio_slice_files = list_files(
13
- path=save_dir, extensions=AUDIO_EXTENSIONS.union([".lab"]), recursive=True
14
- )
15
- audio_files = {}
16
- label_files = {}
17
- for file_path in tqdm(audio_slice_files, desc="Merging audio files"):
18
- rel_path = Path(file_path).relative_to(save_path)
19
- (save_path / rel_path.parent).mkdir(parents=True, exist_ok=True)
20
- if file_path.suffix == ".wav":
21
- prefix = rel_path.parent / file_path.stem.rsplit("-", 1)[0]
22
- if prefix == rel_path.parent / file_path.stem:
23
- continue
24
- audio = AudioSegment.from_wav(file_path)
25
- if prefix in audio_files.keys():
26
- audio_files[prefix] = audio_files[prefix] + audio
27
- else:
28
- audio_files[prefix] = audio
29
-
30
- elif file_path.suffix == ".lab":
31
- prefix = rel_path.parent / file_path.stem.rsplit("-", 1)[0]
32
- if prefix == rel_path.parent / file_path.stem:
33
- continue
34
- with open(file_path, "r", encoding="utf-8") as f:
35
- label = f.read()
36
- if prefix in label_files.keys():
37
- label_files[prefix] = label_files[prefix] + ", " + label
38
- else:
39
- label_files[prefix] = label
40
-
41
- for prefix, audio in audio_files.items():
42
- output_audio_path = save_path / f"{prefix}.wav"
43
- audio.export(output_audio_path, format="wav")
44
-
45
- for prefix, label in label_files.items():
46
- output_label_path = save_path / f"{prefix}.lab"
47
- with open(output_label_path, "w", encoding="utf-8") as f:
48
- f.write(label)
49
-
50
- for file_path in original_files:
51
- os.remove(file_path)
52
-
53
-
54
- if __name__ == "__main__":
55
- merge_and_delete_files("/made/by/spicysama/laziman", [__file__])
@@ -1 +0,0 @@
1
- {"version":3,"file":"static/css/main.4bafd904.css","mappings":"AAAA,WAKE,cAAe,CAJf,aAMF,CACA,4BAFE,kBAAmB,CAFnB,YAAa,CAFb,iBAAkB,CAClB,WAaF,CARA,iBAGE,SAAU,CAGV,YAAa,CAJb,QAMF,CACA,WACE,YAAa,CACb,6BACF,CACA,eAEE,kBAAmB,CADnB,YAEF,CACA,YAGE,eAAgB,CADhB,mBAAoB,CADpB,iBAAkB,CAGlB,UACF,CACA,GAEE,oBAAqB,CACrB,2BAA4B,CAF5B,mBAAoB,CAMpB,cAAe,CAHf,eAAgB,CAIhB,cAAiB,CAHjB,sBAAuB,CACvB,qBAGF,CACA,eACE,UAAW,CACX,eAAgB,CAChB,cACF,CACA,kBAOE,kBAAmB,CALnB,WAAY,CAGZ,YAAa,CACb,6BAA8B,CAH9B,UAAW,CAFX,iBAAkB,CAGlB,WAIF,CACA,iBAGE,wBAA6B,CAD7B,cAAiB,CADjB,SAGF,CACA,YAME,wBAAqB,CAHrB,iBAAkB,CADlB,WAAY,CADZ,UAMF,CACA,iBAEE,UAAc,CADd,cAAe,CAEf,iBAAkB,CAClB,aAAc,CACd,iBACF,CACA,SAEE,WAAY,CAIZ,6BAA8B,CAH9B,SAAU,CAFV,iBAAkB,CAGlB,UAIF,CACA,mBAFE,kBAAmB,CAFnB,YASF,CALA,UAEE,qBAAsB,CAEtB,WACF,CACA,cAEE,eAAgB,CADhB,eAEF,CACA,SACE,eACF,CACA,WACE,cACF,CACA,WAGE,qBAAsB,CADtB,YAAa,CAEb,WAAY,CACZ,iBAAkB,CAJlB,YAKF,CACA,aAIE,UAAW,CAHX,YAAa,CACb,6BAA8B,CAC9B,qBAEF,CACA,wBACE,cAAe,CACf,eACF,CACA,SAEE,cAAe,CACf,eAAgB,CAEhB,sBAAuB,CADvB,kBAAmB,CAHnB,WAKF,CACA,UACE,WACF,CACA,OAKE,UAAW,CADX,cAAe,CAFf,QAAS,CADT,iBAAkB,CAElB,OAAQ,CAGR,iCAA6B,CAA7B,yBACF,CACA,cAEE,kBAAmB,CADnB,YAEF,CACA,aAEE,aAAuB,CADvB,iBAEF,CACA,YAIE,qBAAsB,CAEtB,iBAAkB,CADlB,UAAW,CAJX,YAAa,CACb,qBAAsB,CACtB,iBAIF,CACA,kBAGE,kBAAmB,CAFnB,YAAa,CACb,6BAA8B,CAE9B,kBACF,CACA,YACE,cAAe,CACf,eACF,CACA,UAEE,YAAa,CADb,WAEF,CACA,cAIE,qBAAsB,CACtB,iBAAkB,CAElB,UAAW,CALX,WAAY,CACZ,gBAAiB,CAGjB,WAAY,CALZ,UAOF,CACA,SACE,YAAa,CACb,mBAAoB,CACpB,eACF,CClLA,UAEE,UAAW,CACX,cAAe,CAFf,wBAGF,CAEA,gBACE,aACF,CCRA,SAIE,eAAgB,CAFhB,cAAe,CACf,cAAe,CAEf,aAAc,CACd,qBAAsB,CALtB,iBAAkB,CAMlB,8BACF,CAEA,SACE,cAAe,CACf,cAAe,CACf,eACF,CAEA,WAEE,kBAAmB,CAInB,UAAW,CALX,YAAa,CAEb,cAAe,CAEf,UAAW,CADX,QAGF,CAEA,MAGE,cAAe,CACf,gBAAiB,CAHjB,iBAAkB,CAClB,WAGF,CAEA,YACE,aACF,CAEA,OACE,wBACF,CAEA,SAEE,eAAgB,CADhB,iBAAkB,CAGlB,8BAAgC,CADhC,UAEF,CAEA,MAGE,eAAgB,CADhB,iCAA6B,CAA7B,yBAA6B,CAD7B,OAGF,CAEA,iBAGE,kBAAmB,CAFnB,YAAa,CACb,cAAe,CAEf,UACF,CAEA,gBAGE,kBAAmB,CAFnB,YAAa,CACb,6BAEF,CAEA,eAEE,eAAgB,CADhB,gBAEF,CAEA,UAIE,qBAAsB,CACtB,iBAAkB,CAElB,UAAW,CALX,wBAAyB,CACzB,gBAAiB,CAGjB,WAAY,CALZ,UAOF,CAEA,QACE,0BACF,CAEA,MAEE,qBAAsB,CAGtB,kBAAmB,CAFnB,kBAAmB,CAGnB,eAAgB,CAFhB,YAAa,CAHb,iBAMF,CAEA,uBACE,mCAA4B,CAA5B,2BACF,CAEA,WAUE,wBAAyB,CADzB,kBAAmB,CAJnB,WAAY,CAGZ,gBAAiB,CAPjB,iBAAkB,CAClB,UAAW,CAKX,iBAAkB,CAJlB,oBAAqB,CAGrB,kCAA2B,CAA3B,0BAA2B,CAK3B,8BAAgC,CAPhC,UAQF,CAEA,iBAEE,wBAAyB,CADzB,cAEF,CAEA,YAEE,UAAW,CADX,wBAEF","sources":["scenes/launch_model/styles/modelCardStyle.css","components/copyComponent/style.css","scenes/register_model/styles/registerModelStyle.css"],"sourcesContent":[".container {\n display: block;\n position: relative;\n width: 300px;\n height: 300px;\n cursor: pointer;\n border-radius: 20px;\n}\n.descriptionCard {\n position: relative;\n top: -1px;\n left: -1px;\n width: 300px;\n height: 300px;\n padding: 20px;\n border-radius: 20px;\n}\n.cardTitle {\n display: flex;\n justify-content: space-between;\n}\n.iconButtonBox {\n display: flex;\n align-items: center;\n}\n.drawerCard {\n position: relative;\n padding: 20px 80px 0;\n min-height: 100%;\n width: 60vw;\n}\n.p {\n display: -webkit-box;\n -webkit-line-clamp: 4;\n -webkit-box-orient: vertical;\n overflow: hidden;\n text-overflow: ellipsis;\n word-break: break-word;\n font-size: 14px;\n padding: 0px 10px;\n}\n.formContainer {\n height: 80%;\n overflow: scroll;\n padding: 0 10px;\n}\n.buttonsContainer {\n position: absolute;\n bottom: 50px;\n left: 100px;\n right: 100px;\n display: flex;\n justify-content: space-between;\n align-items: center;\n}\n.buttonContainer {\n width: 45%;\n border-width: 0px;\n background-color: transparent;\n}\n.buttonItem {\n width: 100%;\n padding: 5px;\n border-radius: 4px;\n border: 1px solid #e5e7eb;\n border-width: 1px;\n border-color: #e5e7eb;\n}\n.instructionText {\n font-size: 12px;\n color: #666666;\n font-style: italic;\n margin: 30px 0;\n text-align: center;\n}\n.iconRow {\n position: absolute;\n bottom: 20px;\n left: 20px;\n right: 20px;\n display: flex;\n justify-content: space-between;\n align-items: center;\n}\n.iconItem {\n display: flex;\n flex-direction: column;\n align-items: center;\n margin: 20px;\n}\n.boldIconText {\n font-weight: bold;\n font-size: 1.2em;\n}\n.muiIcon {\n font-size: 1.5em;\n}\n.smallText {\n font-size: 0.8em;\n}\n.dialogBox {\n width: 1241px;\n height: 607px;\n background-color: #fff;\n margin: 32px;\n overflow-x: scroll;\n}\n.dialogTitle {\n display: flex;\n justify-content: space-between;\n padding: 20px 20px 7px;\n color: #000;\n}\n.dialogTitle-model_name {\n font-size: 18px;\n font-weight: 700;\n}\n.pathBox {\n width: 160px;\n cursor: pointer;\n overflow: hidden;\n white-space: nowrap;\n text-overflow: ellipsis;\n}\n.pathBox2 {\n width: 300px;\n}\n.empty {\n position: absolute;\n left: 50%;\n top: 30%;\n font-size: 20px;\n color: #555;\n transform: translate(-50%, 0);\n}\n.deleteDialog {\n display: flex;\n align-items: center;\n}\n.warningIcon {\n margin-right: 10px;\n color: rgb(237, 108, 2);\n}\n.jsonDialog {\n display: flex;\n flex-direction: column;\n padding: 10px 30px;\n background-color: #fff;\n color: #000;\n border-radius: 8px;\n}\n.jsonDialog-title {\n display: flex;\n justify-content: space-between;\n align-items: center;\n margin: 10px 0 20px 0;\n}\n.title-name {\n font-size: 16px;\n font-weight: 700;\n}\n.main-box {\n width: 700px;\n height: 500px;\n}\n.textarea-box {\n width: 100%;\n height: 100%;\n padding: 5px 10px;\n border: 1px solid #ddd;\n border-radius: 5px;\n resize: none;\n color: #444;\n}\n.but-box {\n display: flex;\n justify-content: end;\n margin-top: 20px;\n}\n",".copyText {\n font-size: 14px !important;\n color: #666;\n cursor: pointer;\n}\n\n.copyText:hover {\n color: #1976d2;\n}\n",".formBox {\n position: relative;\n max-width: 50vw;\n min-width: 50vw;\n max-height: 80vh;\n overflow: auto;\n padding: 40px 20px 0 0;\n transition: all 0.4s ease-in-out;\n}\n\n.broaden {\n max-width: 100%;\n min-width: 100%;\n padding-right: 0;\n}\n\n.show-json {\n display: flex;\n align-items: center;\n position: fixed;\n top: 90px;\n right: 60px;\n color: #444;\n}\n\n.icon {\n position: absolute;\n right: -40px;\n cursor: pointer;\n margin-left: 20px;\n}\n\n.icon:hover {\n color: #1976d2;\n}\n\n.arrow {\n font-size: 24px !important;\n}\n\n.jsonBox {\n position: relative;\n min-height: 80vh;\n width: 100%;\n transition: all 0.4s ease-in-out;\n}\n\n.hide {\n width: 0;\n transform: translate(30vw, 0);\n overflow: hidden;\n}\n\n.checkboxWrapper {\n display: flex;\n flex-wrap: wrap;\n align-items: center;\n width: 100%;\n}\n\n.jsonBox-header {\n display: flex;\n justify-content: space-between;\n align-items: center;\n}\n\n.jsonBox-title {\n line-height: 40px;\n font-weight: 700;\n}\n\n.textarea {\n width: 100%;\n height: calc(100% - 40px);\n padding: 5px 10px;\n border: 1px solid #ddd;\n border-radius: 5px;\n resize: none;\n color: #444;\n}\n\n.addBtn {\n margin-left: 20px !important;\n}\n\n.item {\n position: relative;\n background-color: #eee;\n margin: 10px 50px 0;\n padding: 20px;\n border-radius: 10px;\n overflow: hidden;\n}\n\n.item:hover .deleteBtn {\n transform: translateX(-50px);\n}\n\n.deleteBtn {\n position: absolute;\n right: 20px;\n top: calc(50% - 25px);\n width: 50px;\n height: 50px;\n transform: translateX(80px);\n text-align: center;\n line-height: 70px;\n border-radius: 25px;\n background-color: #1976d2;\n transition: all 0.3s ease-in-out;\n}\n\n.deleteBtn:hover {\n cursor: pointer;\n box-shadow: 0 0 10px #aaa;\n}\n\n.deleteIcon {\n font-size: 28px !important;\n color: #fff;\n}\n"],"names":[],"sourceRoot":""}