sopro 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sopro/__init__.py CHANGED
@@ -3,4 +3,4 @@ from __future__ import annotations
3
3
  from .model import SoproTTS
4
4
 
5
5
  __all__ = ["SoproTTS"]
6
- __version__ = "1.0.1"
6
+ __version__ = "1.0.2"
sopro/cli.py CHANGED
@@ -97,79 +97,80 @@ def main() -> None:
97
97
  arr = np.load(args.ref_tokens)
98
98
  ref_tokens_tq = torch.from_numpy(arr).long()
99
99
 
100
- text_ids = tts.encode_text(args.text)
101
- ref = tts.encode_reference(
102
- ref_audio_path=args.ref_audio,
103
- ref_tokens_tq=ref_tokens_tq,
104
- ref_seconds=args.ref_seconds,
105
- )
100
+ with torch.inference_mode():
101
+ text_ids = tts.encode_text(args.text)
102
+ ref = tts.encode_reference(
103
+ ref_audio_path=args.ref_audio,
104
+ ref_tokens_tq=ref_tokens_tq,
105
+ ref_seconds=args.ref_seconds,
106
+ )
106
107
 
107
- prep = tts.model.prepare_conditioning(
108
- text_ids,
109
- ref,
110
- max_frames=args.max_frames,
111
- device=tts.device,
112
- style_strength=float(
113
- args.style_strength
114
- if args.style_strength is not None
115
- else cfg.style_strength
116
- ),
117
- )
108
+ prep = tts.model.prepare_conditioning(
109
+ text_ids,
110
+ ref,
111
+ max_frames=args.max_frames,
112
+ device=tts.device,
113
+ style_strength=float(
114
+ args.style_strength
115
+ if args.style_strength is not None
116
+ else cfg.style_strength
117
+ ),
118
+ )
118
119
 
119
- t_start = time.perf_counter()
120
+ t_start = time.perf_counter()
120
121
 
121
- hist_A: list[int] = []
122
- pbar = tqdm(
123
- total=args.max_frames,
124
- desc="AR sampling",
125
- unit="frame",
126
- disable=args.quiet,
127
- )
122
+ hist_A: list[int] = []
123
+ pbar = tqdm(
124
+ total=args.max_frames,
125
+ desc="AR sampling",
126
+ unit="frame",
127
+ disable=args.quiet,
128
+ )
128
129
 
129
- for _t, rvq1, p_stop in tts.model.ar_stream(
130
- prep,
131
- max_frames=args.max_frames,
132
- top_p=args.top_p,
133
- temperature=args.temperature,
134
- anti_loop=(not args.no_anti_loop),
135
- use_prefix=(not args.no_prefix),
136
- prefix_sec_fixed=args.prefix_sec,
137
- use_stop_head=(False if args.no_stop_head else None),
138
- stop_patience=args.stop_patience,
139
- stop_threshold=args.stop_threshold,
140
- ):
141
- hist_A.append(int(rvq1))
142
- pbar.update(1)
143
- if p_stop is None:
144
- pbar.set_postfix(p_stop="off")
145
- else:
146
- pbar.set_postfix(p_stop=f"{float(p_stop):.2f}")
147
-
148
- pbar.n = len(hist_A)
149
- pbar.close()
150
-
151
- t_after_sampling = time.perf_counter()
152
-
153
- T = len(hist_A)
154
- if T == 0:
155
- save_audio(args.out, torch.zeros(1, 0), sr=TARGET_SR)
156
- t_end = time.perf_counter()
157
- if not args.quiet:
158
- print(
159
- f"[Timing] sampling={t_after_sampling - t_start:.2f}s, "
160
- f"postproc+decode+save={t_end - t_after_sampling:.2f}s, "
161
- f"total={t_end - t_start:.2f}s"
162
- )
163
- print(f"[Done] Wrote {args.out}")
164
- return
165
-
166
- tokens_A = torch.tensor(hist_A, device=tts.device, dtype=torch.long).unsqueeze(0)
167
- cond_seq = prep["cond_all"][:, :T, :]
168
- tokens_1xTQ = tts.model.nar_refine(cond_seq, tokens_A)
169
- tokens_tq = tokens_1xTQ.squeeze(0)
170
-
171
- wav = tts.codec.decode_full(tokens_tq)
172
- save_audio(args.out, wav, sr=TARGET_SR)
130
+ for _t, rvq1, p_stop in tts.model.ar_stream(
131
+ prep,
132
+ max_frames=args.max_frames,
133
+ top_p=args.top_p,
134
+ temperature=args.temperature,
135
+ anti_loop=(not args.no_anti_loop),
136
+ use_prefix=(not args.no_prefix),
137
+ prefix_sec_fixed=args.prefix_sec,
138
+ use_stop_head=(False if args.no_stop_head else None),
139
+ stop_patience=args.stop_patience,
140
+ stop_threshold=args.stop_threshold,
141
+ ):
142
+ hist_A.append(int(rvq1))
143
+ pbar.update(1)
144
+ if p_stop is None:
145
+ pbar.set_postfix(p_stop="off")
146
+ else:
147
+ pbar.set_postfix(p_stop=f"{float(p_stop):.2f}")
148
+
149
+ pbar.n = len(hist_A)
150
+ pbar.close()
151
+
152
+ t_after_sampling = time.perf_counter()
153
+
154
+ T = len(hist_A)
155
+ if T == 0:
156
+ save_audio(args.out, torch.zeros(1, 0), sr=TARGET_SR)
157
+ t_end = time.perf_counter()
158
+ if not args.quiet:
159
+ print(
160
+ f"[Timing] sampling={t_after_sampling - t_start:.2f}s, "
161
+ f"postproc+decode+save={t_end - t_after_sampling:.2f}s, "
162
+ f"total={t_end - t_start:.2f}s"
163
+ )
164
+ print(f"[Done] Wrote {args.out}")
165
+ return
166
+
167
+ tokens_A = torch.tensor(hist_A, device=tts.device, dtype=torch.long).unsqueeze(0)
168
+ cond_seq = prep["cond_all"][:, :T, :]
169
+ tokens_1xTQ = tts.model.nar_refine(cond_seq, tokens_A)
170
+ tokens_tq = tokens_1xTQ.squeeze(0)
171
+
172
+ wav = tts.codec.decode_full(tokens_tq)
173
+ save_audio(args.out, wav, sr=TARGET_SR)
173
174
 
174
175
  t_end = time.perf_counter()
175
176
  if not args.quiet:
sopro/model.py CHANGED
@@ -793,7 +793,7 @@ class SoproTTS:
793
793
  )
794
794
  return ref
795
795
 
796
- @torch.no_grad()
796
+ @torch.inference_mode()
797
797
  def synthesize(
798
798
  self,
799
799
  text: str,
sopro/streaming.py CHANGED
@@ -145,7 +145,7 @@ class SoproTTSStreamer:
145
145
  if wav is not None:
146
146
  yield wav
147
147
 
148
-
148
+ @torch.inference_mode()
149
149
  def stream(
150
150
  tts: SoproTTS,
151
151
  text: str,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sopro
3
- Version: 1.0.1
3
+ Version: 1.0.2
4
4
  Summary: A lightweight text-to-speech model with zero-shot voice cloning.
5
5
  Author-email: Samuel Vitorino <samvitorino@gmail.com>
6
6
  License: Apache 2.0
@@ -1,12 +1,12 @@
1
- sopro/__init__.py,sha256=NFZuESqdCL7bGXuTB8c61XxUJqhkHPUOSTqzH4pyUfU,110
1
+ sopro/__init__.py,sha256=SreucP3h4V4KsUU8PskOofqKEAmL8RvXYA6Ma53zb8Y,110
2
2
  sopro/audio.py,sha256=xlp6aYzzGlOMcNZ-p9lDeeU0TUkSHMcvmLantwg_4-0,4162
3
- sopro/cli.py,sha256=YKfGalyhbRuvjVrGJuo1NlIC7h8CszlMxuTwhYgUSwQ,5751
3
+ sopro/cli.py,sha256=v4mZ_zfYt6BusVFMIHo3ae32xLw2_O_v5SDkfmt_Cvc,6040
4
4
  sopro/config.py,sha256=OBD-k2z5GUdjFS545MyBXx-dAGhwnhRG11LW-zQt1-g,1063
5
5
  sopro/constants.py,sha256=wSjFKeFIcLCxyVUVb3njxMK666IuxjlNzVT4_jfPovQ,97
6
6
  sopro/hub.py,sha256=xsHfeO8X7v__FELvaQxWHYG8P39ygrgbluPs5GQjoCM,1391
7
- sopro/model.py,sha256=YXwcVGN3v5T0kvKttmo9WNPpewF-b5aOZoTMVypkzO8,28624
7
+ sopro/model.py,sha256=Rj10OPdx8UEhH2reU2SQ4oTztNJCOvLS7pm84V0E_xo,28631
8
8
  sopro/sampling.py,sha256=Q5rbuef_BIuy12cv5J7v6k9ob3zQ0OFJIlMHssOkiuU,2951
9
- sopro/streaming.py,sha256=O5Kkl4cUBjzgjTrEwQK2ka5h6sgcYaEZmIp66-obcPM,4975
9
+ sopro/streaming.py,sha256=AgPzaxdimeDT-8potXAMXuRi7zrWUGgxViwHJw2R2Lk,4998
10
10
  sopro/tokenizer.py,sha256=ucb86Jr-EaAyD9OHDoCmwB9Nh9AFIZK_TlZmMkv46KQ,1325
11
11
  sopro/codec/__init__.py,sha256=6D6Q0M-SUZZnq79OT1nATenEc8zIZDrhZBpm7zdPEE4,129
12
12
  sopro/codec/mimi.py,sha256=RNKnXfhWXUqHiU27C90wj18Rb3R2IZHpm5_cS_XAs9Y,5798
@@ -15,9 +15,9 @@ sopro/nn/blocks.py,sha256=zDEVUH2LXapXuQ4DyhplNh1I0iJYrNUL20IxHoz8ucs,3221
15
15
  sopro/nn/embeddings.py,sha256=7YfYKj1v1oafTV4-iucJG4fmeT43fP_rQiJ6ACRKPNI,3185
16
16
  sopro/nn/speaker.py,sha256=L2bs-bPlyxoWZyMTctBBuMTaEWm6FP7K1udrXehnTGM,2964
17
17
  sopro/nn/xattn.py,sha256=OeRo1HbRZs0AkQ6AV6Q8cqYZP9K4vI-IwT3uVn9jOqg,2939
18
- sopro-1.0.1.dist-info/licenses/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
19
- sopro-1.0.1.dist-info/METADATA,sha256=tlq9mTTsNEFgMyCtle7om5hqKRm5LwrVCFLo4olQ3_s,6470
20
- sopro-1.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
- sopro-1.0.1.dist-info/entry_points.txt,sha256=OWcKgC5Syk8rzOhNzTZ3QR5GJEG88UfiShkovrwb2cI,44
22
- sopro-1.0.1.dist-info/top_level.txt,sha256=Tik26_lEwzSKDuwQdqwoqA_O0b7CDATzousa0Q17PBo,6
23
- sopro-1.0.1.dist-info/RECORD,,
18
+ sopro-1.0.2.dist-info/licenses/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
19
+ sopro-1.0.2.dist-info/METADATA,sha256=LPMr5tnwQx3Rq5FX9CCMq6s4IvreA-EWQvt-OzQkm7g,6470
20
+ sopro-1.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
+ sopro-1.0.2.dist-info/entry_points.txt,sha256=OWcKgC5Syk8rzOhNzTZ3QR5GJEG88UfiShkovrwb2cI,44
22
+ sopro-1.0.2.dist-info/top_level.txt,sha256=Tik26_lEwzSKDuwQdqwoqA_O0b7CDATzousa0Q17PBo,6
23
+ sopro-1.0.2.dist-info/RECORD,,
File without changes