sopro 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sopro/__init__.py +1 -1
- sopro/cli.py +70 -69
- sopro/model.py +1 -1
- sopro/streaming.py +1 -1
- {sopro-1.0.0.dist-info → sopro-1.0.2.dist-info}/METADATA +2 -2
- {sopro-1.0.0.dist-info → sopro-1.0.2.dist-info}/RECORD +10 -10
- {sopro-1.0.0.dist-info → sopro-1.0.2.dist-info}/WHEEL +0 -0
- {sopro-1.0.0.dist-info → sopro-1.0.2.dist-info}/entry_points.txt +0 -0
- {sopro-1.0.0.dist-info → sopro-1.0.2.dist-info}/licenses/LICENSE.txt +0 -0
- {sopro-1.0.0.dist-info → sopro-1.0.2.dist-info}/top_level.txt +0 -0
sopro/__init__.py
CHANGED
sopro/cli.py
CHANGED
|
@@ -97,79 +97,80 @@ def main() -> None:
|
|
|
97
97
|
arr = np.load(args.ref_tokens)
|
|
98
98
|
ref_tokens_tq = torch.from_numpy(arr).long()
|
|
99
99
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
100
|
+
with torch.inference_mode():
|
|
101
|
+
text_ids = tts.encode_text(args.text)
|
|
102
|
+
ref = tts.encode_reference(
|
|
103
|
+
ref_audio_path=args.ref_audio,
|
|
104
|
+
ref_tokens_tq=ref_tokens_tq,
|
|
105
|
+
ref_seconds=args.ref_seconds,
|
|
106
|
+
)
|
|
106
107
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
108
|
+
prep = tts.model.prepare_conditioning(
|
|
109
|
+
text_ids,
|
|
110
|
+
ref,
|
|
111
|
+
max_frames=args.max_frames,
|
|
112
|
+
device=tts.device,
|
|
113
|
+
style_strength=float(
|
|
114
|
+
args.style_strength
|
|
115
|
+
if args.style_strength is not None
|
|
116
|
+
else cfg.style_strength
|
|
117
|
+
),
|
|
118
|
+
)
|
|
118
119
|
|
|
119
|
-
|
|
120
|
+
t_start = time.perf_counter()
|
|
120
121
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
122
|
+
hist_A: list[int] = []
|
|
123
|
+
pbar = tqdm(
|
|
124
|
+
total=args.max_frames,
|
|
125
|
+
desc="AR sampling",
|
|
126
|
+
unit="frame",
|
|
127
|
+
disable=args.quiet,
|
|
128
|
+
)
|
|
128
129
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
130
|
+
for _t, rvq1, p_stop in tts.model.ar_stream(
|
|
131
|
+
prep,
|
|
132
|
+
max_frames=args.max_frames,
|
|
133
|
+
top_p=args.top_p,
|
|
134
|
+
temperature=args.temperature,
|
|
135
|
+
anti_loop=(not args.no_anti_loop),
|
|
136
|
+
use_prefix=(not args.no_prefix),
|
|
137
|
+
prefix_sec_fixed=args.prefix_sec,
|
|
138
|
+
use_stop_head=(False if args.no_stop_head else None),
|
|
139
|
+
stop_patience=args.stop_patience,
|
|
140
|
+
stop_threshold=args.stop_threshold,
|
|
141
|
+
):
|
|
142
|
+
hist_A.append(int(rvq1))
|
|
143
|
+
pbar.update(1)
|
|
144
|
+
if p_stop is None:
|
|
145
|
+
pbar.set_postfix(p_stop="off")
|
|
146
|
+
else:
|
|
147
|
+
pbar.set_postfix(p_stop=f"{float(p_stop):.2f}")
|
|
148
|
+
|
|
149
|
+
pbar.n = len(hist_A)
|
|
150
|
+
pbar.close()
|
|
151
|
+
|
|
152
|
+
t_after_sampling = time.perf_counter()
|
|
153
|
+
|
|
154
|
+
T = len(hist_A)
|
|
155
|
+
if T == 0:
|
|
156
|
+
save_audio(args.out, torch.zeros(1, 0), sr=TARGET_SR)
|
|
157
|
+
t_end = time.perf_counter()
|
|
158
|
+
if not args.quiet:
|
|
159
|
+
print(
|
|
160
|
+
f"[Timing] sampling={t_after_sampling - t_start:.2f}s, "
|
|
161
|
+
f"postproc+decode+save={t_end - t_after_sampling:.2f}s, "
|
|
162
|
+
f"total={t_end - t_start:.2f}s"
|
|
163
|
+
)
|
|
164
|
+
print(f"[Done] Wrote {args.out}")
|
|
165
|
+
return
|
|
166
|
+
|
|
167
|
+
tokens_A = torch.tensor(hist_A, device=tts.device, dtype=torch.long).unsqueeze(0)
|
|
168
|
+
cond_seq = prep["cond_all"][:, :T, :]
|
|
169
|
+
tokens_1xTQ = tts.model.nar_refine(cond_seq, tokens_A)
|
|
170
|
+
tokens_tq = tokens_1xTQ.squeeze(0)
|
|
171
|
+
|
|
172
|
+
wav = tts.codec.decode_full(tokens_tq)
|
|
173
|
+
save_audio(args.out, wav, sr=TARGET_SR)
|
|
173
174
|
|
|
174
175
|
t_end = time.perf_counter()
|
|
175
176
|
if not args.quiet:
|
sopro/model.py
CHANGED
sopro/streaming.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sopro
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: A lightweight text-to-speech model with zero-shot voice cloning.
|
|
5
5
|
Author-email: Samuel Vitorino <samvitorino@gmail.com>
|
|
6
6
|
License: Apache 2.0
|
|
@@ -53,7 +53,7 @@ conda activate soprotts
|
|
|
53
53
|
### From PyPI
|
|
54
54
|
|
|
55
55
|
```bash
|
|
56
|
-
pip install sopro
|
|
56
|
+
pip install sopro
|
|
57
57
|
```
|
|
58
58
|
|
|
59
59
|
### From the repo
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
sopro/__init__.py,sha256=
|
|
1
|
+
sopro/__init__.py,sha256=SreucP3h4V4KsUU8PskOofqKEAmL8RvXYA6Ma53zb8Y,110
|
|
2
2
|
sopro/audio.py,sha256=xlp6aYzzGlOMcNZ-p9lDeeU0TUkSHMcvmLantwg_4-0,4162
|
|
3
|
-
sopro/cli.py,sha256=
|
|
3
|
+
sopro/cli.py,sha256=v4mZ_zfYt6BusVFMIHo3ae32xLw2_O_v5SDkfmt_Cvc,6040
|
|
4
4
|
sopro/config.py,sha256=OBD-k2z5GUdjFS545MyBXx-dAGhwnhRG11LW-zQt1-g,1063
|
|
5
5
|
sopro/constants.py,sha256=wSjFKeFIcLCxyVUVb3njxMK666IuxjlNzVT4_jfPovQ,97
|
|
6
6
|
sopro/hub.py,sha256=xsHfeO8X7v__FELvaQxWHYG8P39ygrgbluPs5GQjoCM,1391
|
|
7
|
-
sopro/model.py,sha256=
|
|
7
|
+
sopro/model.py,sha256=Rj10OPdx8UEhH2reU2SQ4oTztNJCOvLS7pm84V0E_xo,28631
|
|
8
8
|
sopro/sampling.py,sha256=Q5rbuef_BIuy12cv5J7v6k9ob3zQ0OFJIlMHssOkiuU,2951
|
|
9
|
-
sopro/streaming.py,sha256=
|
|
9
|
+
sopro/streaming.py,sha256=AgPzaxdimeDT-8potXAMXuRi7zrWUGgxViwHJw2R2Lk,4998
|
|
10
10
|
sopro/tokenizer.py,sha256=ucb86Jr-EaAyD9OHDoCmwB9Nh9AFIZK_TlZmMkv46KQ,1325
|
|
11
11
|
sopro/codec/__init__.py,sha256=6D6Q0M-SUZZnq79OT1nATenEc8zIZDrhZBpm7zdPEE4,129
|
|
12
12
|
sopro/codec/mimi.py,sha256=RNKnXfhWXUqHiU27C90wj18Rb3R2IZHpm5_cS_XAs9Y,5798
|
|
@@ -15,9 +15,9 @@ sopro/nn/blocks.py,sha256=zDEVUH2LXapXuQ4DyhplNh1I0iJYrNUL20IxHoz8ucs,3221
|
|
|
15
15
|
sopro/nn/embeddings.py,sha256=7YfYKj1v1oafTV4-iucJG4fmeT43fP_rQiJ6ACRKPNI,3185
|
|
16
16
|
sopro/nn/speaker.py,sha256=L2bs-bPlyxoWZyMTctBBuMTaEWm6FP7K1udrXehnTGM,2964
|
|
17
17
|
sopro/nn/xattn.py,sha256=OeRo1HbRZs0AkQ6AV6Q8cqYZP9K4vI-IwT3uVn9jOqg,2939
|
|
18
|
-
sopro-1.0.
|
|
19
|
-
sopro-1.0.
|
|
20
|
-
sopro-1.0.
|
|
21
|
-
sopro-1.0.
|
|
22
|
-
sopro-1.0.
|
|
23
|
-
sopro-1.0.
|
|
18
|
+
sopro-1.0.2.dist-info/licenses/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
19
|
+
sopro-1.0.2.dist-info/METADATA,sha256=LPMr5tnwQx3Rq5FX9CCMq6s4IvreA-EWQvt-OzQkm7g,6470
|
|
20
|
+
sopro-1.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
21
|
+
sopro-1.0.2.dist-info/entry_points.txt,sha256=OWcKgC5Syk8rzOhNzTZ3QR5GJEG88UfiShkovrwb2cI,44
|
|
22
|
+
sopro-1.0.2.dist-info/top_level.txt,sha256=Tik26_lEwzSKDuwQdqwoqA_O0b7CDATzousa0Q17PBo,6
|
|
23
|
+
sopro-1.0.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|