xinference 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_compat.py +22 -2
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +91 -6
- xinference/client/restful/restful_client.py +39 -0
- xinference/core/model.py +41 -13
- xinference/deploy/cmdline.py +3 -1
- xinference/deploy/test/test_cmdline.py +56 -0
- xinference/isolation.py +24 -0
- xinference/model/audio/__init__.py +12 -0
- xinference/model/audio/core.py +26 -4
- xinference/model/audio/f5tts.py +195 -0
- xinference/model/audio/fish_speech.py +71 -35
- xinference/model/audio/model_spec.json +88 -0
- xinference/model/audio/model_spec_modelscope.json +9 -0
- xinference/model/audio/whisper_mlx.py +208 -0
- xinference/model/embedding/core.py +322 -6
- xinference/model/embedding/model_spec.json +8 -1
- xinference/model/embedding/model_spec_modelscope.json +9 -1
- xinference/model/llm/__init__.py +4 -2
- xinference/model/llm/llm_family.json +479 -53
- xinference/model/llm/llm_family_modelscope.json +423 -17
- xinference/model/llm/mlx/core.py +230 -50
- xinference/model/llm/sglang/core.py +2 -0
- xinference/model/llm/transformers/chatglm.py +9 -5
- xinference/model/llm/transformers/core.py +1 -0
- xinference/model/llm/transformers/glm_edge_v.py +230 -0
- xinference/model/llm/transformers/utils.py +16 -8
- xinference/model/llm/utils.py +23 -1
- xinference/model/llm/vllm/core.py +89 -2
- xinference/thirdparty/f5_tts/__init__.py +0 -0
- xinference/thirdparty/f5_tts/api.py +166 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
- xinference/thirdparty/f5_tts/eval/README.md +49 -0
- xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
- xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
- xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
- xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
- xinference/thirdparty/f5_tts/infer/README.md +191 -0
- xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
- xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
- xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
- xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
- xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
- xinference/thirdparty/f5_tts/model/__init__.py +10 -0
- xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
- xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
- xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
- xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
- xinference/thirdparty/f5_tts/model/cfm.py +285 -0
- xinference/thirdparty/f5_tts/model/dataset.py +319 -0
- xinference/thirdparty/f5_tts/model/modules.py +658 -0
- xinference/thirdparty/f5_tts/model/trainer.py +366 -0
- xinference/thirdparty/f5_tts/model/utils.py +185 -0
- xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
- xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
- xinference/thirdparty/f5_tts/socket_server.py +159 -0
- xinference/thirdparty/f5_tts/train/README.md +77 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
- xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
- xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
- xinference/thirdparty/f5_tts/train/train.py +75 -0
- xinference/types.py +2 -1
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.2f269bb3.js → main.4eb4ee80.js} +3 -3
- xinference/web/ui/build/static/js/main.4eb4ee80.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8c5eeb02f772d02cbe8b89c05428d0dd41a97866f75f7dc1c2164a67f5a1cf98.json +1 -0
- {xinference-1.0.0.dist-info → xinference-1.1.0.dist-info}/METADATA +39 -18
- {xinference-1.0.0.dist-info → xinference-1.1.0.dist-info}/RECORD +92 -39
- {xinference-1.0.0.dist-info → xinference-1.1.0.dist-info}/WHEEL +1 -1
- xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
- /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.4eb4ee80.js.LICENSE.txt} +0 -0
- {xinference-1.0.0.dist-info → xinference-1.1.0.dist-info}/LICENSE +0 -0
- {xinference-1.0.0.dist-info → xinference-1.1.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.0.0.dist-info → xinference-1.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# training script.
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from importlib.resources import files
|
|
5
|
+
|
|
6
|
+
import hydra
|
|
7
|
+
|
|
8
|
+
from f5_tts.model import CFM, DiT, Trainer, UNetT
|
|
9
|
+
from f5_tts.model.dataset import load_dataset
|
|
10
|
+
from f5_tts.model.utils import get_tokenizer
|
|
11
|
+
|
|
12
|
+
os.chdir(str(files("f5_tts").joinpath("../.."))) # change working directory to root of project (local editable)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@hydra.main(version_base="1.3", config_path=str(files("f5_tts").joinpath("configs")), config_name=None)
|
|
16
|
+
def main(cfg):
|
|
17
|
+
tokenizer = cfg.model.tokenizer
|
|
18
|
+
mel_spec_type = cfg.model.mel_spec.mel_spec_type
|
|
19
|
+
exp_name = f"{cfg.model.name}_{mel_spec_type}_{cfg.model.tokenizer}_{cfg.datasets.name}"
|
|
20
|
+
|
|
21
|
+
# set text tokenizer
|
|
22
|
+
if tokenizer != "custom":
|
|
23
|
+
tokenizer_path = cfg.datasets.name
|
|
24
|
+
else:
|
|
25
|
+
tokenizer_path = cfg.model.tokenizer_path
|
|
26
|
+
vocab_char_map, vocab_size = get_tokenizer(tokenizer_path, tokenizer)
|
|
27
|
+
|
|
28
|
+
# set model
|
|
29
|
+
if "F5TTS" in cfg.model.name:
|
|
30
|
+
model_cls = DiT
|
|
31
|
+
elif "E2TTS" in cfg.model.name:
|
|
32
|
+
model_cls = UNetT
|
|
33
|
+
wandb_resume_id = None
|
|
34
|
+
|
|
35
|
+
model = CFM(
|
|
36
|
+
transformer=model_cls(**cfg.model.arch, text_num_embeds=vocab_size, mel_dim=cfg.model.mel_spec.n_mel_channels),
|
|
37
|
+
mel_spec_kwargs=cfg.model.mel_spec,
|
|
38
|
+
vocab_char_map=vocab_char_map,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# init trainer
|
|
42
|
+
trainer = Trainer(
|
|
43
|
+
model,
|
|
44
|
+
epochs=cfg.optim.epochs,
|
|
45
|
+
learning_rate=cfg.optim.learning_rate,
|
|
46
|
+
num_warmup_updates=cfg.optim.num_warmup_updates,
|
|
47
|
+
save_per_updates=cfg.ckpts.save_per_updates,
|
|
48
|
+
checkpoint_path=str(files("f5_tts").joinpath(f"../../{cfg.ckpts.save_dir}")),
|
|
49
|
+
batch_size=cfg.datasets.batch_size_per_gpu,
|
|
50
|
+
batch_size_type=cfg.datasets.batch_size_type,
|
|
51
|
+
max_samples=cfg.datasets.max_samples,
|
|
52
|
+
grad_accumulation_steps=cfg.optim.grad_accumulation_steps,
|
|
53
|
+
max_grad_norm=cfg.optim.max_grad_norm,
|
|
54
|
+
logger=cfg.ckpts.logger,
|
|
55
|
+
wandb_project="CFM-TTS",
|
|
56
|
+
wandb_run_name=exp_name,
|
|
57
|
+
wandb_resume_id=wandb_resume_id,
|
|
58
|
+
last_per_steps=cfg.ckpts.last_per_steps,
|
|
59
|
+
log_samples=True,
|
|
60
|
+
bnb_optimizer=cfg.optim.bnb_optimizer,
|
|
61
|
+
mel_spec_type=mel_spec_type,
|
|
62
|
+
is_local_vocoder=cfg.model.vocoder.is_local,
|
|
63
|
+
local_vocoder_path=cfg.model.vocoder.local_path,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
train_dataset = load_dataset(cfg.datasets.name, tokenizer, mel_spec_kwargs=cfg.model.mel_spec)
|
|
67
|
+
trainer.train(
|
|
68
|
+
train_dataset,
|
|
69
|
+
num_workers=cfg.datasets.num_workers,
|
|
70
|
+
resumable_with_seed=666, # seed for shuffling dataset
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
if __name__ == "__main__":
|
|
75
|
+
main()
|
xinference/types.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"files": {
|
|
3
3
|
"main.css": "./static/css/main.5061c4c3.css",
|
|
4
|
-
"main.js": "./static/js/main.
|
|
4
|
+
"main.js": "./static/js/main.4eb4ee80.js",
|
|
5
5
|
"static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
|
|
6
6
|
"index.html": "./index.html",
|
|
7
7
|
"main.5061c4c3.css.map": "./static/css/main.5061c4c3.css.map",
|
|
8
|
-
"main.
|
|
8
|
+
"main.4eb4ee80.js.map": "./static/js/main.4eb4ee80.js.map"
|
|
9
9
|
},
|
|
10
10
|
"entrypoints": [
|
|
11
11
|
"static/css/main.5061c4c3.css",
|
|
12
|
-
"static/js/main.
|
|
12
|
+
"static/js/main.4eb4ee80.js"
|
|
13
13
|
]
|
|
14
14
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.
|
|
1
|
+
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.4eb4ee80.js"></script><link href="./static/css/main.5061c4c3.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|