xinference 0.14.4.post1__py3-none-any.whl → 0.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_compat.py +51 -0
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +209 -40
- xinference/client/restful/restful_client.py +7 -26
- xinference/conftest.py +1 -1
- xinference/constants.py +5 -0
- xinference/core/cache_tracker.py +1 -1
- xinference/core/chat_interface.py +8 -14
- xinference/core/event.py +1 -1
- xinference/core/image_interface.py +28 -0
- xinference/core/model.py +110 -31
- xinference/core/scheduler.py +37 -37
- xinference/core/status_guard.py +1 -1
- xinference/core/supervisor.py +17 -10
- xinference/core/utils.py +80 -22
- xinference/core/worker.py +17 -16
- xinference/deploy/cmdline.py +8 -16
- xinference/deploy/local.py +1 -1
- xinference/deploy/supervisor.py +1 -1
- xinference/deploy/utils.py +1 -1
- xinference/deploy/worker.py +1 -1
- xinference/model/audio/cosyvoice.py +86 -41
- xinference/model/audio/fish_speech.py +9 -9
- xinference/model/audio/model_spec.json +9 -9
- xinference/model/audio/whisper.py +4 -1
- xinference/model/embedding/core.py +52 -31
- xinference/model/image/core.py +2 -1
- xinference/model/image/model_spec.json +16 -4
- xinference/model/image/model_spec_modelscope.json +16 -4
- xinference/model/image/sdapi.py +136 -0
- xinference/model/image/stable_diffusion/core.py +164 -19
- xinference/model/llm/__init__.py +29 -11
- xinference/model/llm/llama_cpp/core.py +16 -33
- xinference/model/llm/llm_family.json +1011 -1296
- xinference/model/llm/llm_family.py +34 -53
- xinference/model/llm/llm_family_csghub.json +18 -35
- xinference/model/llm/llm_family_modelscope.json +981 -1122
- xinference/model/llm/lmdeploy/core.py +56 -88
- xinference/model/llm/mlx/core.py +46 -69
- xinference/model/llm/sglang/core.py +36 -18
- xinference/model/llm/transformers/chatglm.py +168 -306
- xinference/model/llm/transformers/cogvlm2.py +36 -63
- xinference/model/llm/transformers/cogvlm2_video.py +33 -223
- xinference/model/llm/transformers/core.py +55 -50
- xinference/model/llm/transformers/deepseek_v2.py +340 -0
- xinference/model/llm/transformers/deepseek_vl.py +53 -96
- xinference/model/llm/transformers/glm4v.py +55 -111
- xinference/model/llm/transformers/intern_vl.py +39 -70
- xinference/model/llm/transformers/internlm2.py +32 -54
- xinference/model/llm/transformers/minicpmv25.py +22 -55
- xinference/model/llm/transformers/minicpmv26.py +158 -68
- xinference/model/llm/transformers/omnilmm.py +5 -28
- xinference/model/llm/transformers/qwen2_audio.py +168 -0
- xinference/model/llm/transformers/qwen2_vl.py +234 -0
- xinference/model/llm/transformers/qwen_vl.py +34 -86
- xinference/model/llm/transformers/utils.py +32 -38
- xinference/model/llm/transformers/yi_vl.py +32 -72
- xinference/model/llm/utils.py +280 -554
- xinference/model/llm/vllm/core.py +161 -100
- xinference/model/rerank/core.py +41 -8
- xinference/model/rerank/model_spec.json +7 -0
- xinference/model/rerank/model_spec_modelscope.json +7 -1
- xinference/model/utils.py +1 -31
- xinference/thirdparty/cosyvoice/bin/export_jit.py +64 -0
- xinference/thirdparty/cosyvoice/bin/export_trt.py +8 -0
- xinference/thirdparty/cosyvoice/bin/inference.py +5 -2
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +38 -22
- xinference/thirdparty/cosyvoice/cli/model.py +139 -26
- xinference/thirdparty/cosyvoice/flow/flow.py +15 -9
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +20 -1
- xinference/thirdparty/cosyvoice/hifigan/generator.py +8 -4
- xinference/thirdparty/cosyvoice/llm/llm.py +14 -13
- xinference/thirdparty/cosyvoice/transformer/attention.py +7 -3
- xinference/thirdparty/cosyvoice/transformer/decoder.py +1 -1
- xinference/thirdparty/cosyvoice/transformer/embedding.py +4 -3
- xinference/thirdparty/cosyvoice/transformer/encoder.py +4 -2
- xinference/thirdparty/cosyvoice/utils/common.py +36 -0
- xinference/thirdparty/cosyvoice/utils/file_utils.py +16 -0
- xinference/thirdparty/deepseek_vl/serve/assets/Kelpy-Codos.js +100 -0
- xinference/thirdparty/deepseek_vl/serve/assets/avatar.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/assets/custom.css +355 -0
- xinference/thirdparty/deepseek_vl/serve/assets/custom.js +22 -0
- xinference/thirdparty/deepseek_vl/serve/assets/favicon.ico +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/app.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/chart.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/mirror.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/pipeline.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/puzzle.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/rap.jpeg +0 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/base.yaml +87 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/firefly_gan_vq.yaml +33 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/lora/r_8_alpha_16.yaml +4 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/text2semantic_finetune.yaml +83 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text-data.proto +24 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/README.md +27 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py +0 -3
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +169 -198
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +4 -27
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/.gitignore +114 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/README.md +36 -0
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +9 -47
- xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/train.py +2 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/css/style.css +161 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/html/footer.html +11 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/js/animate.js +69 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +12 -10
- xinference/thirdparty/fish_speech/tools/api.py +79 -134
- xinference/thirdparty/fish_speech/tools/commons.py +35 -0
- xinference/thirdparty/fish_speech/tools/download_models.py +3 -3
- xinference/thirdparty/fish_speech/tools/file.py +17 -0
- xinference/thirdparty/fish_speech/tools/llama/build_dataset.py +1 -1
- xinference/thirdparty/fish_speech/tools/llama/generate.py +29 -24
- xinference/thirdparty/fish_speech/tools/llama/merge_lora.py +1 -1
- xinference/thirdparty/fish_speech/tools/llama/quantize.py +2 -2
- xinference/thirdparty/fish_speech/tools/msgpack_api.py +34 -0
- xinference/thirdparty/fish_speech/tools/post_api.py +85 -44
- xinference/thirdparty/fish_speech/tools/sensevoice/README.md +59 -0
- xinference/thirdparty/fish_speech/tools/sensevoice/fun_asr.py +1 -1
- xinference/thirdparty/fish_speech/tools/smart_pad.py +16 -3
- xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +2 -2
- xinference/thirdparty/fish_speech/tools/vqgan/inference.py +4 -2
- xinference/thirdparty/fish_speech/tools/webui.py +12 -146
- xinference/thirdparty/matcha/VERSION +1 -0
- xinference/thirdparty/matcha/hifigan/LICENSE +21 -0
- xinference/thirdparty/matcha/hifigan/README.md +101 -0
- xinference/thirdparty/omnilmm/LICENSE +201 -0
- xinference/thirdparty/whisper/__init__.py +156 -0
- xinference/thirdparty/whisper/__main__.py +3 -0
- xinference/thirdparty/whisper/assets/gpt2.tiktoken +50256 -0
- xinference/thirdparty/whisper/assets/mel_filters.npz +0 -0
- xinference/thirdparty/whisper/assets/multilingual.tiktoken +50257 -0
- xinference/thirdparty/whisper/audio.py +157 -0
- xinference/thirdparty/whisper/decoding.py +826 -0
- xinference/thirdparty/whisper/model.py +314 -0
- xinference/thirdparty/whisper/normalizers/__init__.py +2 -0
- xinference/thirdparty/whisper/normalizers/basic.py +76 -0
- xinference/thirdparty/whisper/normalizers/english.json +1741 -0
- xinference/thirdparty/whisper/normalizers/english.py +550 -0
- xinference/thirdparty/whisper/timing.py +386 -0
- xinference/thirdparty/whisper/tokenizer.py +395 -0
- xinference/thirdparty/whisper/transcribe.py +605 -0
- xinference/thirdparty/whisper/triton_ops.py +109 -0
- xinference/thirdparty/whisper/utils.py +316 -0
- xinference/thirdparty/whisper/version.py +1 -0
- xinference/types.py +14 -53
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/{main.4bafd904.css → main.5061c4c3.css} +2 -2
- xinference/web/ui/build/static/css/main.5061c4c3.css.map +1 -0
- xinference/web/ui/build/static/js/main.754740c0.js +3 -0
- xinference/web/ui/build/static/js/{main.eb13fe95.js.LICENSE.txt → main.754740c0.js.LICENSE.txt} +2 -0
- xinference/web/ui/build/static/js/main.754740c0.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/10c69dc7a296779fcffedeff9393d832dfcb0013c36824adf623d3c518b801ff.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/68bede6d95bb5ef0b35bbb3ec5b8c937eaf6862c6cdbddb5ef222a7776aaf336.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/77d50223f3e734d4485cca538cb098a8c3a7a0a1a9f01f58cdda3af42fe1adf5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a56d5a642409a84988891089c98ca28ad0546432dfbae8aaa51bc5a280e1cdd2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/cd90b08d177025dfe84209596fc51878f8a86bcaa6a240848a3d2e5fd4c7ff24.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d9ff696a3e3471f01b46c63d18af32e491eb5dc0e43cb30202c96871466df57f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +37 -0
- xinference/web/ui/node_modules/a-sync-waterfall/package.json +21 -0
- xinference/web/ui/node_modules/nunjucks/node_modules/commander/package.json +48 -0
- xinference/web/ui/node_modules/nunjucks/package.json +112 -0
- xinference/web/ui/package-lock.json +38 -0
- xinference/web/ui/package.json +1 -0
- {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/METADATA +16 -10
- {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/RECORD +179 -127
- xinference/model/llm/transformers/llama_2.py +0 -108
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py +0 -442
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py +0 -44
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py +0 -115
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/wavenet.py +0 -225
- xinference/thirdparty/fish_speech/tools/auto_rerank.py +0 -159
- xinference/thirdparty/fish_speech/tools/gen_ref.py +0 -36
- xinference/thirdparty/fish_speech/tools/merge_asr_files.py +0 -55
- xinference/web/ui/build/static/css/main.4bafd904.css.map +0 -1
- xinference/web/ui/build/static/js/main.eb13fe95.js +0 -3
- xinference/web/ui/build/static/js/main.eb13fe95.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0b11a5339468c13b2d31ac085e7effe4303259b2071abd46a0a8eb8529233a5e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/213b5913e164773c2b0567455377765715f5f07225fbac77ad8e1e9dc9648a47.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5c26a23b5eacf5b752a08531577ae3840bb247745ef9a39583dc2d05ba93a82a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/978b57d1a04a701bc3fcfebc511f5f274eed6ed7eade67f6fb76c27d5fd9ecc8.json +0 -1
- {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/LICENSE +0 -0
- {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/WHEEL +0 -0
- {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Base configuration for training a model
|
|
2
|
+
paths:
|
|
3
|
+
run_dir: results/${project}
|
|
4
|
+
ckpt_dir: ${paths.run_dir}/checkpoints
|
|
5
|
+
|
|
6
|
+
hydra:
|
|
7
|
+
run:
|
|
8
|
+
dir: ${paths.run_dir}
|
|
9
|
+
|
|
10
|
+
# Lightning Trainer
|
|
11
|
+
trainer:
|
|
12
|
+
_target_: lightning.pytorch.trainer.Trainer
|
|
13
|
+
|
|
14
|
+
default_root_dir: ${paths.run_dir}
|
|
15
|
+
accelerator: gpu
|
|
16
|
+
num_nodes: 1
|
|
17
|
+
devices: auto
|
|
18
|
+
strategy:
|
|
19
|
+
_target_: lightning.pytorch.strategies.DDPStrategy
|
|
20
|
+
process_group_backend: nccl # This should be override when training on windows
|
|
21
|
+
|
|
22
|
+
precision: bf16-mixed
|
|
23
|
+
|
|
24
|
+
# disable validation by epoch end
|
|
25
|
+
check_val_every_n_epoch: null
|
|
26
|
+
val_check_interval: 5000
|
|
27
|
+
max_steps: 100_000
|
|
28
|
+
|
|
29
|
+
# Use torch.backends.cudnn.benchmark to speed up training
|
|
30
|
+
benchmark: true
|
|
31
|
+
|
|
32
|
+
# Callbacks
|
|
33
|
+
callbacks:
|
|
34
|
+
model_checkpoint:
|
|
35
|
+
_target_: lightning.pytorch.callbacks.ModelCheckpoint
|
|
36
|
+
dirpath: ${paths.ckpt_dir}
|
|
37
|
+
filename: "step_{step:09d}"
|
|
38
|
+
save_last: false # additionally always save an exact copy of the last checkpoint to a file last.ckpt
|
|
39
|
+
save_top_k: 5 # save 5 latest checkpoints
|
|
40
|
+
monitor: step # use step to monitor checkpoints
|
|
41
|
+
mode: max # save the latest checkpoint with the highest global_step
|
|
42
|
+
every_n_epochs: null # don't save checkpoints by epoch end
|
|
43
|
+
every_n_train_steps: 5000 # save checkpoints every 5000 steps
|
|
44
|
+
auto_insert_metric_name: false
|
|
45
|
+
|
|
46
|
+
model_summary:
|
|
47
|
+
_target_: lightning.pytorch.callbacks.ModelSummary
|
|
48
|
+
max_depth: 2 # the maximum depth of layer nesting that the summary will include
|
|
49
|
+
|
|
50
|
+
learning_rate_monitor:
|
|
51
|
+
_target_: lightning.pytorch.callbacks.LearningRateMonitor
|
|
52
|
+
logging_interval: step
|
|
53
|
+
log_momentum: false
|
|
54
|
+
|
|
55
|
+
grad_norm_monitor:
|
|
56
|
+
_target_: fish_speech.callbacks.GradNormMonitor
|
|
57
|
+
norm_type: 2
|
|
58
|
+
logging_interval: step
|
|
59
|
+
|
|
60
|
+
# Logger
|
|
61
|
+
logger:
|
|
62
|
+
tensorboard:
|
|
63
|
+
_target_: lightning.pytorch.loggers.tensorboard.TensorBoardLogger
|
|
64
|
+
save_dir: "${paths.run_dir}/tensorboard/"
|
|
65
|
+
name: null
|
|
66
|
+
log_graph: false
|
|
67
|
+
default_hp_metric: true
|
|
68
|
+
prefix: ""
|
|
69
|
+
|
|
70
|
+
# wandb:
|
|
71
|
+
# _target_: lightning.pytorch.loggers.wandb.WandbLogger
|
|
72
|
+
# # name: "" # name of the run (normally generated by wandb)
|
|
73
|
+
# save_dir: "${paths.run_dir}"
|
|
74
|
+
# offline: False
|
|
75
|
+
# id: null # pass correct id to resume experiment!
|
|
76
|
+
# anonymous: null # enable anonymous logging
|
|
77
|
+
# project: "fish-speech"
|
|
78
|
+
# log_model: False # upload lightning ckpts
|
|
79
|
+
# prefix: "" # a string to put at the beginning of metric keys
|
|
80
|
+
# # entity: "" # set to name of your wandb team
|
|
81
|
+
# group: ""
|
|
82
|
+
# tags: ["vq", "hq", "finetune"]
|
|
83
|
+
# job_type: ""
|
|
84
|
+
|
|
85
|
+
# Loop
|
|
86
|
+
train: true
|
|
87
|
+
test: false
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
_target_: fish_speech.models.vqgan.modules.firefly.FireflyArchitecture
|
|
2
|
+
spec_transform:
|
|
3
|
+
_target_: fish_speech.utils.spectrogram.LogMelSpectrogram
|
|
4
|
+
sample_rate: 44100
|
|
5
|
+
n_mels: 160
|
|
6
|
+
n_fft: 2048
|
|
7
|
+
hop_length: 512
|
|
8
|
+
win_length: 2048
|
|
9
|
+
backbone:
|
|
10
|
+
_target_: fish_speech.models.vqgan.modules.firefly.ConvNeXtEncoder
|
|
11
|
+
input_channels: 160
|
|
12
|
+
depths: [3, 3, 9, 3]
|
|
13
|
+
dims: [128, 256, 384, 512]
|
|
14
|
+
drop_path_rate: 0.2
|
|
15
|
+
kernel_size: 7
|
|
16
|
+
head:
|
|
17
|
+
_target_: fish_speech.models.vqgan.modules.firefly.HiFiGANGenerator
|
|
18
|
+
hop_length: 512
|
|
19
|
+
upsample_rates: [8, 8, 2, 2, 2] # aka. strides
|
|
20
|
+
upsample_kernel_sizes: [16, 16, 4, 4, 4]
|
|
21
|
+
resblock_kernel_sizes: [3, 7, 11]
|
|
22
|
+
resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
|
|
23
|
+
num_mels: 512
|
|
24
|
+
upsample_initial_channel: 512
|
|
25
|
+
pre_conv_kernel_size: 13
|
|
26
|
+
post_conv_kernel_size: 13
|
|
27
|
+
quantizer:
|
|
28
|
+
_target_: fish_speech.models.vqgan.modules.fsq.DownsampleFiniteScalarQuantize
|
|
29
|
+
input_dim: 512
|
|
30
|
+
n_groups: 8
|
|
31
|
+
n_codebooks: 1
|
|
32
|
+
levels: [8, 5, 5, 5]
|
|
33
|
+
downsample_factor: [2, 2]
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
defaults:
|
|
2
|
+
- base
|
|
3
|
+
- _self_
|
|
4
|
+
|
|
5
|
+
project: text2semantic_finetune_dual_ar
|
|
6
|
+
max_length: 4096
|
|
7
|
+
pretrained_ckpt_path: checkpoints/fish-speech-1.4
|
|
8
|
+
|
|
9
|
+
# Lightning Trainer
|
|
10
|
+
trainer:
|
|
11
|
+
accumulate_grad_batches: 1
|
|
12
|
+
gradient_clip_val: 1.0
|
|
13
|
+
gradient_clip_algorithm: "norm"
|
|
14
|
+
max_steps: 1000
|
|
15
|
+
precision: bf16-true
|
|
16
|
+
limit_val_batches: 10
|
|
17
|
+
val_check_interval: 100
|
|
18
|
+
|
|
19
|
+
# Dataset Configuration
|
|
20
|
+
tokenizer:
|
|
21
|
+
_target_: transformers.AutoTokenizer.from_pretrained
|
|
22
|
+
pretrained_model_name_or_path: ${pretrained_ckpt_path}
|
|
23
|
+
|
|
24
|
+
# Dataset Configuration
|
|
25
|
+
train_dataset:
|
|
26
|
+
_target_: fish_speech.datasets.semantic.AutoTextSemanticInstructionDataset
|
|
27
|
+
proto_files:
|
|
28
|
+
- data/protos
|
|
29
|
+
tokenizer: ${tokenizer}
|
|
30
|
+
causal: true
|
|
31
|
+
max_length: ${max_length}
|
|
32
|
+
use_speaker: false
|
|
33
|
+
interactive_prob: 0.7
|
|
34
|
+
|
|
35
|
+
val_dataset:
|
|
36
|
+
_target_: fish_speech.datasets.semantic.AutoTextSemanticInstructionDataset
|
|
37
|
+
proto_files:
|
|
38
|
+
- data/protos
|
|
39
|
+
tokenizer: ${tokenizer}
|
|
40
|
+
causal: true
|
|
41
|
+
max_length: ${max_length}
|
|
42
|
+
use_speaker: false
|
|
43
|
+
interactive_prob: 0.7
|
|
44
|
+
|
|
45
|
+
data:
|
|
46
|
+
_target_: fish_speech.datasets.semantic.SemanticDataModule
|
|
47
|
+
train_dataset: ${train_dataset}
|
|
48
|
+
val_dataset: ${val_dataset}
|
|
49
|
+
num_workers: 4
|
|
50
|
+
batch_size: 8
|
|
51
|
+
tokenizer: ${tokenizer}
|
|
52
|
+
max_length: ${max_length}
|
|
53
|
+
|
|
54
|
+
# Model Configuration
|
|
55
|
+
model:
|
|
56
|
+
_target_: fish_speech.models.text2semantic.lit_module.TextToSemantic
|
|
57
|
+
model:
|
|
58
|
+
_target_: fish_speech.models.text2semantic.llama.BaseTransformer.from_pretrained
|
|
59
|
+
path: ${pretrained_ckpt_path}
|
|
60
|
+
load_weights: true
|
|
61
|
+
max_length: ${max_length}
|
|
62
|
+
lora_config: null
|
|
63
|
+
|
|
64
|
+
optimizer:
|
|
65
|
+
_target_: torch.optim.AdamW
|
|
66
|
+
_partial_: true
|
|
67
|
+
lr: 1e-4
|
|
68
|
+
weight_decay: 0
|
|
69
|
+
betas: [0.9, 0.95]
|
|
70
|
+
eps: 1e-5
|
|
71
|
+
|
|
72
|
+
lr_scheduler:
|
|
73
|
+
_target_: torch.optim.lr_scheduler.LambdaLR
|
|
74
|
+
_partial_: true
|
|
75
|
+
lr_lambda:
|
|
76
|
+
_target_: fish_speech.scheduler.get_constant_schedule_with_warmup_lr_lambda
|
|
77
|
+
_partial_: true
|
|
78
|
+
num_warmup_steps: 10
|
|
79
|
+
|
|
80
|
+
# Callbacks
|
|
81
|
+
callbacks:
|
|
82
|
+
model_checkpoint:
|
|
83
|
+
every_n_train_steps: ${trainer.val_check_interval}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
syntax = "proto3";
|
|
2
|
+
|
|
3
|
+
package text_data;
|
|
4
|
+
|
|
5
|
+
message Semantics {
|
|
6
|
+
repeated uint32 values = 1;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
message Sentence {
|
|
10
|
+
repeated string texts = 1;
|
|
11
|
+
repeated Semantics semantics = 3;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
message TextData {
|
|
15
|
+
string source = 1;
|
|
16
|
+
string name = 2;
|
|
17
|
+
repeated Sentence sentences = 4;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
message SampledData {
|
|
21
|
+
string source = 1;
|
|
22
|
+
string name = 2;
|
|
23
|
+
repeated Sentence samples = 3;
|
|
24
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
## i18n Folder Attribution
|
|
2
|
+
|
|
3
|
+
The `i18n` folder within the `fish_speech` directory contains files initially sourced from the RVC project. In compliance with the MIT license under which these files were released, we acknowledge the original authors and sources below:
|
|
4
|
+
|
|
5
|
+
### fish_speech/i18n/core.py
|
|
6
|
+
|
|
7
|
+
**Related code from RVC:**
|
|
8
|
+
[https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/83d6a64e675d9bbd6e92ee450c5f807ed2bb54d8/i18n/i18n.py](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/83d6a64e675d9bbd6e92ee450c5f807ed2bb54d8/i18n/i18n.py)
|
|
9
|
+
|
|
10
|
+
**Initial commit:**
|
|
11
|
+
add localization(添加本地化) [RVC-Project/Retrieval-based-Voice-Conversion-WebUI#35](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/pull/35)
|
|
12
|
+
|
|
13
|
+
**Initial author:**
|
|
14
|
+
[@L4Ph](https://github.com/L4Ph)
|
|
15
|
+
|
|
16
|
+
### fish_speech/i18n/scan.py
|
|
17
|
+
|
|
18
|
+
**Related code from RVC:**
|
|
19
|
+
[https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/83d6a64e675d9bbd6e92ee450c5f807ed2bb54d8/i18n/scan_i18n.py](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/83d6a64e675d9bbd6e92ee450c5f807ed2bb54d8/i18n/scan_i18n.py)
|
|
20
|
+
|
|
21
|
+
**Initial commit:**
|
|
22
|
+
File for detecting i18n missing keys [RVC-Project/Retrieval-based-Voice-Conversion-WebUI#1058](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/pull/1058)
|
|
23
|
+
|
|
24
|
+
**Initial author:**
|
|
25
|
+
[@towzeur](https://github.com/towzeur)
|
|
26
|
+
|
|
27
|
+
We appreciate the contributions of the RVC project and its authors.
|
|
@@ -72,7 +72,7 @@
|
|
|
72
72
|
"Put your text here.": "Put your text here.",
|
|
73
73
|
"Reference Audio": "Reference Audio",
|
|
74
74
|
"Reference Text": "Reference Text",
|
|
75
|
-
"Related code
|
|
75
|
+
"Related code and weights are released under CC BY-NC-SA 4.0 License.": "Related code and weights are released under CC BY-NC-SA 4.0 License.",
|
|
76
76
|
"Remove Selected Data": "Remove Selected Data",
|
|
77
77
|
"Removed path successfully!": "Removed path successfully!",
|
|
78
78
|
"Repetition Penalty": "Repetition Penalty",
|
|
@@ -72,7 +72,7 @@
|
|
|
72
72
|
"Put your text here.": "Ponga su texto aquí.",
|
|
73
73
|
"Reference Audio": "Audio de Referencia",
|
|
74
74
|
"Reference Text": "Texto de Referencia",
|
|
75
|
-
"Related code
|
|
75
|
+
"Related code and weights are released under CC BY-NC-SA 4.0 License.": "El código relacionado y los pesos se publican bajo la Licencia CC BY-NC-SA 4.0.",
|
|
76
76
|
"Remove Selected Data": "Eliminar Datos Seleccionados",
|
|
77
77
|
"Removed path successfully!": "¡Ruta eliminada exitosamente!",
|
|
78
78
|
"Repetition Penalty": "Penalización por Repetición",
|
|
@@ -72,7 +72,7 @@
|
|
|
72
72
|
"Put your text here.": "ここにテキストを入力してください。",
|
|
73
73
|
"Reference Audio": "リファレンスオーディオ",
|
|
74
74
|
"Reference Text": "リファレンステキスト",
|
|
75
|
-
"Related code
|
|
75
|
+
"Related code and weights are released under CC BY-NC-SA 4.0 License.": "関連コードと重みはCC BY-NC-SA 4.0ライセンスの下でリリースされます。",
|
|
76
76
|
"Remove Selected Data": "選択したデータを削除",
|
|
77
77
|
"Removed path successfully!": "パスの削除に成功しました!",
|
|
78
78
|
"Repetition Penalty": "反復ペナルティ",
|
|
@@ -84,7 +84,7 @@
|
|
|
84
84
|
"Reference Text": "Texto de Referência",
|
|
85
85
|
"warning": "Aviso",
|
|
86
86
|
"Pre-processing begins...": "O pré-processamento começou!",
|
|
87
|
-
"Related code
|
|
87
|
+
"Related code and weights are released under CC BY-NC-SA 4.0 License.": "O código relacionado e os pesos são licenciados sob a Licença CC BY-NC-SA 4.0.",
|
|
88
88
|
"Remove Selected Data": "Remover Dados Selecionados",
|
|
89
89
|
"Removed path successfully!": "Caminho removido com sucesso!",
|
|
90
90
|
"Repetition Penalty": "Penalidade de Repetição",
|
|
@@ -72,7 +72,7 @@
|
|
|
72
72
|
"Put your text here.": "在此处输入文本.",
|
|
73
73
|
"Reference Audio": "参考音频",
|
|
74
74
|
"Reference Text": "参考文本",
|
|
75
|
-
"Related code
|
|
75
|
+
"Related code and weights are released under CC BY-NC-SA 4.0 License.": "相关代码和权重使用 CC BY-NC-SA 4.0 许可证发布.",
|
|
76
76
|
"Remove Selected Data": "移除选中数据",
|
|
77
77
|
"Removed path successfully!": "移除路径成功!",
|
|
78
78
|
"Repetition Penalty": "重复惩罚",
|
|
@@ -353,7 +353,7 @@ class BaseTransformer(nn.Module):
|
|
|
353
353
|
|
|
354
354
|
if "int8" in str(Path(path)):
|
|
355
355
|
logger.info("Using int8 weight-only quantization!")
|
|
356
|
-
from
|
|
356
|
+
from tools.llama.quantize import WeightOnlyInt8QuantHandler
|
|
357
357
|
|
|
358
358
|
simple_quantizer = WeightOnlyInt8QuantHandler(model)
|
|
359
359
|
model = simple_quantizer.convert_for_runtime()
|
|
@@ -363,7 +363,7 @@ class BaseTransformer(nn.Module):
|
|
|
363
363
|
path_comps = path.name.split("-")
|
|
364
364
|
assert path_comps[-2].startswith("g")
|
|
365
365
|
groupsize = int(path_comps[-2][1:])
|
|
366
|
-
from
|
|
366
|
+
from tools.llama.quantize import WeightOnlyInt4QuantHandler
|
|
367
367
|
|
|
368
368
|
simple_quantizer = WeightOnlyInt4QuantHandler(model, groupsize)
|
|
369
369
|
model = simple_quantizer.convert_for_runtime()
|