sopro 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sopro/__init__.py +1 -1
- sopro/cli.py +70 -69
- sopro/model.py +1 -1
- sopro/streaming.py +1 -1
- {sopro-1.0.1.dist-info → sopro-1.0.2.dist-info}/METADATA +1 -1
- {sopro-1.0.1.dist-info → sopro-1.0.2.dist-info}/RECORD +10 -10
- {sopro-1.0.1.dist-info → sopro-1.0.2.dist-info}/WHEEL +0 -0
- {sopro-1.0.1.dist-info → sopro-1.0.2.dist-info}/entry_points.txt +0 -0
- {sopro-1.0.1.dist-info → sopro-1.0.2.dist-info}/licenses/LICENSE.txt +0 -0
- {sopro-1.0.1.dist-info → sopro-1.0.2.dist-info}/top_level.txt +0 -0
sopro/__init__.py
CHANGED
sopro/cli.py
CHANGED
|
@@ -97,79 +97,80 @@ def main() -> None:
|
|
|
97
97
|
arr = np.load(args.ref_tokens)
|
|
98
98
|
ref_tokens_tq = torch.from_numpy(arr).long()
|
|
99
99
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
100
|
+
with torch.inference_mode():
|
|
101
|
+
text_ids = tts.encode_text(args.text)
|
|
102
|
+
ref = tts.encode_reference(
|
|
103
|
+
ref_audio_path=args.ref_audio,
|
|
104
|
+
ref_tokens_tq=ref_tokens_tq,
|
|
105
|
+
ref_seconds=args.ref_seconds,
|
|
106
|
+
)
|
|
106
107
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
108
|
+
prep = tts.model.prepare_conditioning(
|
|
109
|
+
text_ids,
|
|
110
|
+
ref,
|
|
111
|
+
max_frames=args.max_frames,
|
|
112
|
+
device=tts.device,
|
|
113
|
+
style_strength=float(
|
|
114
|
+
args.style_strength
|
|
115
|
+
if args.style_strength is not None
|
|
116
|
+
else cfg.style_strength
|
|
117
|
+
),
|
|
118
|
+
)
|
|
118
119
|
|
|
119
|
-
|
|
120
|
+
t_start = time.perf_counter()
|
|
120
121
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
122
|
+
hist_A: list[int] = []
|
|
123
|
+
pbar = tqdm(
|
|
124
|
+
total=args.max_frames,
|
|
125
|
+
desc="AR sampling",
|
|
126
|
+
unit="frame",
|
|
127
|
+
disable=args.quiet,
|
|
128
|
+
)
|
|
128
129
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
130
|
+
for _t, rvq1, p_stop in tts.model.ar_stream(
|
|
131
|
+
prep,
|
|
132
|
+
max_frames=args.max_frames,
|
|
133
|
+
top_p=args.top_p,
|
|
134
|
+
temperature=args.temperature,
|
|
135
|
+
anti_loop=(not args.no_anti_loop),
|
|
136
|
+
use_prefix=(not args.no_prefix),
|
|
137
|
+
prefix_sec_fixed=args.prefix_sec,
|
|
138
|
+
use_stop_head=(False if args.no_stop_head else None),
|
|
139
|
+
stop_patience=args.stop_patience,
|
|
140
|
+
stop_threshold=args.stop_threshold,
|
|
141
|
+
):
|
|
142
|
+
hist_A.append(int(rvq1))
|
|
143
|
+
pbar.update(1)
|
|
144
|
+
if p_stop is None:
|
|
145
|
+
pbar.set_postfix(p_stop="off")
|
|
146
|
+
else:
|
|
147
|
+
pbar.set_postfix(p_stop=f"{float(p_stop):.2f}")
|
|
148
|
+
|
|
149
|
+
pbar.n = len(hist_A)
|
|
150
|
+
pbar.close()
|
|
151
|
+
|
|
152
|
+
t_after_sampling = time.perf_counter()
|
|
153
|
+
|
|
154
|
+
T = len(hist_A)
|
|
155
|
+
if T == 0:
|
|
156
|
+
save_audio(args.out, torch.zeros(1, 0), sr=TARGET_SR)
|
|
157
|
+
t_end = time.perf_counter()
|
|
158
|
+
if not args.quiet:
|
|
159
|
+
print(
|
|
160
|
+
f"[Timing] sampling={t_after_sampling - t_start:.2f}s, "
|
|
161
|
+
f"postproc+decode+save={t_end - t_after_sampling:.2f}s, "
|
|
162
|
+
f"total={t_end - t_start:.2f}s"
|
|
163
|
+
)
|
|
164
|
+
print(f"[Done] Wrote {args.out}")
|
|
165
|
+
return
|
|
166
|
+
|
|
167
|
+
tokens_A = torch.tensor(hist_A, device=tts.device, dtype=torch.long).unsqueeze(0)
|
|
168
|
+
cond_seq = prep["cond_all"][:, :T, :]
|
|
169
|
+
tokens_1xTQ = tts.model.nar_refine(cond_seq, tokens_A)
|
|
170
|
+
tokens_tq = tokens_1xTQ.squeeze(0)
|
|
171
|
+
|
|
172
|
+
wav = tts.codec.decode_full(tokens_tq)
|
|
173
|
+
save_audio(args.out, wav, sr=TARGET_SR)
|
|
173
174
|
|
|
174
175
|
t_end = time.perf_counter()
|
|
175
176
|
if not args.quiet:
|
sopro/model.py
CHANGED
sopro/streaming.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
sopro/__init__.py,sha256=
|
|
1
|
+
sopro/__init__.py,sha256=SreucP3h4V4KsUU8PskOofqKEAmL8RvXYA6Ma53zb8Y,110
|
|
2
2
|
sopro/audio.py,sha256=xlp6aYzzGlOMcNZ-p9lDeeU0TUkSHMcvmLantwg_4-0,4162
|
|
3
|
-
sopro/cli.py,sha256=
|
|
3
|
+
sopro/cli.py,sha256=v4mZ_zfYt6BusVFMIHo3ae32xLw2_O_v5SDkfmt_Cvc,6040
|
|
4
4
|
sopro/config.py,sha256=OBD-k2z5GUdjFS545MyBXx-dAGhwnhRG11LW-zQt1-g,1063
|
|
5
5
|
sopro/constants.py,sha256=wSjFKeFIcLCxyVUVb3njxMK666IuxjlNzVT4_jfPovQ,97
|
|
6
6
|
sopro/hub.py,sha256=xsHfeO8X7v__FELvaQxWHYG8P39ygrgbluPs5GQjoCM,1391
|
|
7
|
-
sopro/model.py,sha256=
|
|
7
|
+
sopro/model.py,sha256=Rj10OPdx8UEhH2reU2SQ4oTztNJCOvLS7pm84V0E_xo,28631
|
|
8
8
|
sopro/sampling.py,sha256=Q5rbuef_BIuy12cv5J7v6k9ob3zQ0OFJIlMHssOkiuU,2951
|
|
9
|
-
sopro/streaming.py,sha256=
|
|
9
|
+
sopro/streaming.py,sha256=AgPzaxdimeDT-8potXAMXuRi7zrWUGgxViwHJw2R2Lk,4998
|
|
10
10
|
sopro/tokenizer.py,sha256=ucb86Jr-EaAyD9OHDoCmwB9Nh9AFIZK_TlZmMkv46KQ,1325
|
|
11
11
|
sopro/codec/__init__.py,sha256=6D6Q0M-SUZZnq79OT1nATenEc8zIZDrhZBpm7zdPEE4,129
|
|
12
12
|
sopro/codec/mimi.py,sha256=RNKnXfhWXUqHiU27C90wj18Rb3R2IZHpm5_cS_XAs9Y,5798
|
|
@@ -15,9 +15,9 @@ sopro/nn/blocks.py,sha256=zDEVUH2LXapXuQ4DyhplNh1I0iJYrNUL20IxHoz8ucs,3221
|
|
|
15
15
|
sopro/nn/embeddings.py,sha256=7YfYKj1v1oafTV4-iucJG4fmeT43fP_rQiJ6ACRKPNI,3185
|
|
16
16
|
sopro/nn/speaker.py,sha256=L2bs-bPlyxoWZyMTctBBuMTaEWm6FP7K1udrXehnTGM,2964
|
|
17
17
|
sopro/nn/xattn.py,sha256=OeRo1HbRZs0AkQ6AV6Q8cqYZP9K4vI-IwT3uVn9jOqg,2939
|
|
18
|
-
sopro-1.0.
|
|
19
|
-
sopro-1.0.
|
|
20
|
-
sopro-1.0.
|
|
21
|
-
sopro-1.0.
|
|
22
|
-
sopro-1.0.
|
|
23
|
-
sopro-1.0.
|
|
18
|
+
sopro-1.0.2.dist-info/licenses/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
19
|
+
sopro-1.0.2.dist-info/METADATA,sha256=LPMr5tnwQx3Rq5FX9CCMq6s4IvreA-EWQvt-OzQkm7g,6470
|
|
20
|
+
sopro-1.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
21
|
+
sopro-1.0.2.dist-info/entry_points.txt,sha256=OWcKgC5Syk8rzOhNzTZ3QR5GJEG88UfiShkovrwb2cI,44
|
|
22
|
+
sopro-1.0.2.dist-info/top_level.txt,sha256=Tik26_lEwzSKDuwQdqwoqA_O0b7CDATzousa0Q17PBo,6
|
|
23
|
+
sopro-1.0.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|