sopro 1.0.1__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {sopro-1.0.1 → sopro-1.0.2}/PKG-INFO +1 -1
  2. {sopro-1.0.1 → sopro-1.0.2}/pyproject.toml +1 -1
  3. {sopro-1.0.1 → sopro-1.0.2}/src/Sopro.egg-info/PKG-INFO +1 -1
  4. {sopro-1.0.1 → sopro-1.0.2}/src/sopro/__init__.py +1 -1
  5. {sopro-1.0.1 → sopro-1.0.2}/src/sopro/cli.py +70 -69
  6. {sopro-1.0.1 → sopro-1.0.2}/src/sopro/model.py +1 -1
  7. {sopro-1.0.1 → sopro-1.0.2}/src/sopro/streaming.py +1 -1
  8. {sopro-1.0.1 → sopro-1.0.2}/LICENSE.txt +0 -0
  9. {sopro-1.0.1 → sopro-1.0.2}/README.md +0 -0
  10. {sopro-1.0.1 → sopro-1.0.2}/setup.cfg +0 -0
  11. {sopro-1.0.1 → sopro-1.0.2}/src/Sopro.egg-info/SOURCES.txt +0 -0
  12. {sopro-1.0.1 → sopro-1.0.2}/src/Sopro.egg-info/dependency_links.txt +0 -0
  13. {sopro-1.0.1 → sopro-1.0.2}/src/Sopro.egg-info/entry_points.txt +0 -0
  14. {sopro-1.0.1 → sopro-1.0.2}/src/Sopro.egg-info/requires.txt +0 -0
  15. {sopro-1.0.1 → sopro-1.0.2}/src/Sopro.egg-info/top_level.txt +0 -0
  16. {sopro-1.0.1 → sopro-1.0.2}/src/sopro/audio.py +0 -0
  17. {sopro-1.0.1 → sopro-1.0.2}/src/sopro/codec/__init__.py +0 -0
  18. {sopro-1.0.1 → sopro-1.0.2}/src/sopro/codec/mimi.py +0 -0
  19. {sopro-1.0.1 → sopro-1.0.2}/src/sopro/config.py +0 -0
  20. {sopro-1.0.1 → sopro-1.0.2}/src/sopro/constants.py +0 -0
  21. {sopro-1.0.1 → sopro-1.0.2}/src/sopro/hub.py +0 -0
  22. {sopro-1.0.1 → sopro-1.0.2}/src/sopro/nn/__init__.py +0 -0
  23. {sopro-1.0.1 → sopro-1.0.2}/src/sopro/nn/blocks.py +0 -0
  24. {sopro-1.0.1 → sopro-1.0.2}/src/sopro/nn/embeddings.py +0 -0
  25. {sopro-1.0.1 → sopro-1.0.2}/src/sopro/nn/speaker.py +0 -0
  26. {sopro-1.0.1 → sopro-1.0.2}/src/sopro/nn/xattn.py +0 -0
  27. {sopro-1.0.1 → sopro-1.0.2}/src/sopro/sampling.py +0 -0
  28. {sopro-1.0.1 → sopro-1.0.2}/src/sopro/tokenizer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sopro
3
- Version: 1.0.1
3
+ Version: 1.0.2
4
4
  Summary: A lightweight text-to-speech model with zero-shot voice cloning.
5
5
  Author-email: Samuel Vitorino <samvitorino@gmail.com>
6
6
  License: Apache 2.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "sopro"
7
- version = "1.0.1"
7
+ version = "1.0.2"
8
8
  description = "A lightweight text-to-speech model with zero-shot voice cloning."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sopro
3
- Version: 1.0.1
3
+ Version: 1.0.2
4
4
  Summary: A lightweight text-to-speech model with zero-shot voice cloning.
5
5
  Author-email: Samuel Vitorino <samvitorino@gmail.com>
6
6
  License: Apache 2.0
@@ -3,4 +3,4 @@ from __future__ import annotations
3
3
  from .model import SoproTTS
4
4
 
5
5
  __all__ = ["SoproTTS"]
6
- __version__ = "1.0.1"
6
+ __version__ = "1.0.2"
@@ -97,79 +97,80 @@ def main() -> None:
97
97
  arr = np.load(args.ref_tokens)
98
98
  ref_tokens_tq = torch.from_numpy(arr).long()
99
99
 
100
- text_ids = tts.encode_text(args.text)
101
- ref = tts.encode_reference(
102
- ref_audio_path=args.ref_audio,
103
- ref_tokens_tq=ref_tokens_tq,
104
- ref_seconds=args.ref_seconds,
105
- )
100
+ with torch.inference_mode():
101
+ text_ids = tts.encode_text(args.text)
102
+ ref = tts.encode_reference(
103
+ ref_audio_path=args.ref_audio,
104
+ ref_tokens_tq=ref_tokens_tq,
105
+ ref_seconds=args.ref_seconds,
106
+ )
106
107
 
107
- prep = tts.model.prepare_conditioning(
108
- text_ids,
109
- ref,
110
- max_frames=args.max_frames,
111
- device=tts.device,
112
- style_strength=float(
113
- args.style_strength
114
- if args.style_strength is not None
115
- else cfg.style_strength
116
- ),
117
- )
108
+ prep = tts.model.prepare_conditioning(
109
+ text_ids,
110
+ ref,
111
+ max_frames=args.max_frames,
112
+ device=tts.device,
113
+ style_strength=float(
114
+ args.style_strength
115
+ if args.style_strength is not None
116
+ else cfg.style_strength
117
+ ),
118
+ )
118
119
 
119
- t_start = time.perf_counter()
120
+ t_start = time.perf_counter()
120
121
 
121
- hist_A: list[int] = []
122
- pbar = tqdm(
123
- total=args.max_frames,
124
- desc="AR sampling",
125
- unit="frame",
126
- disable=args.quiet,
127
- )
122
+ hist_A: list[int] = []
123
+ pbar = tqdm(
124
+ total=args.max_frames,
125
+ desc="AR sampling",
126
+ unit="frame",
127
+ disable=args.quiet,
128
+ )
128
129
 
129
- for _t, rvq1, p_stop in tts.model.ar_stream(
130
- prep,
131
- max_frames=args.max_frames,
132
- top_p=args.top_p,
133
- temperature=args.temperature,
134
- anti_loop=(not args.no_anti_loop),
135
- use_prefix=(not args.no_prefix),
136
- prefix_sec_fixed=args.prefix_sec,
137
- use_stop_head=(False if args.no_stop_head else None),
138
- stop_patience=args.stop_patience,
139
- stop_threshold=args.stop_threshold,
140
- ):
141
- hist_A.append(int(rvq1))
142
- pbar.update(1)
143
- if p_stop is None:
144
- pbar.set_postfix(p_stop="off")
145
- else:
146
- pbar.set_postfix(p_stop=f"{float(p_stop):.2f}")
147
-
148
- pbar.n = len(hist_A)
149
- pbar.close()
150
-
151
- t_after_sampling = time.perf_counter()
152
-
153
- T = len(hist_A)
154
- if T == 0:
155
- save_audio(args.out, torch.zeros(1, 0), sr=TARGET_SR)
156
- t_end = time.perf_counter()
157
- if not args.quiet:
158
- print(
159
- f"[Timing] sampling={t_after_sampling - t_start:.2f}s, "
160
- f"postproc+decode+save={t_end - t_after_sampling:.2f}s, "
161
- f"total={t_end - t_start:.2f}s"
162
- )
163
- print(f"[Done] Wrote {args.out}")
164
- return
165
-
166
- tokens_A = torch.tensor(hist_A, device=tts.device, dtype=torch.long).unsqueeze(0)
167
- cond_seq = prep["cond_all"][:, :T, :]
168
- tokens_1xTQ = tts.model.nar_refine(cond_seq, tokens_A)
169
- tokens_tq = tokens_1xTQ.squeeze(0)
170
-
171
- wav = tts.codec.decode_full(tokens_tq)
172
- save_audio(args.out, wav, sr=TARGET_SR)
130
+ for _t, rvq1, p_stop in tts.model.ar_stream(
131
+ prep,
132
+ max_frames=args.max_frames,
133
+ top_p=args.top_p,
134
+ temperature=args.temperature,
135
+ anti_loop=(not args.no_anti_loop),
136
+ use_prefix=(not args.no_prefix),
137
+ prefix_sec_fixed=args.prefix_sec,
138
+ use_stop_head=(False if args.no_stop_head else None),
139
+ stop_patience=args.stop_patience,
140
+ stop_threshold=args.stop_threshold,
141
+ ):
142
+ hist_A.append(int(rvq1))
143
+ pbar.update(1)
144
+ if p_stop is None:
145
+ pbar.set_postfix(p_stop="off")
146
+ else:
147
+ pbar.set_postfix(p_stop=f"{float(p_stop):.2f}")
148
+
149
+ pbar.n = len(hist_A)
150
+ pbar.close()
151
+
152
+ t_after_sampling = time.perf_counter()
153
+
154
+ T = len(hist_A)
155
+ if T == 0:
156
+ save_audio(args.out, torch.zeros(1, 0), sr=TARGET_SR)
157
+ t_end = time.perf_counter()
158
+ if not args.quiet:
159
+ print(
160
+ f"[Timing] sampling={t_after_sampling - t_start:.2f}s, "
161
+ f"postproc+decode+save={t_end - t_after_sampling:.2f}s, "
162
+ f"total={t_end - t_start:.2f}s"
163
+ )
164
+ print(f"[Done] Wrote {args.out}")
165
+ return
166
+
167
+ tokens_A = torch.tensor(hist_A, device=tts.device, dtype=torch.long).unsqueeze(0)
168
+ cond_seq = prep["cond_all"][:, :T, :]
169
+ tokens_1xTQ = tts.model.nar_refine(cond_seq, tokens_A)
170
+ tokens_tq = tokens_1xTQ.squeeze(0)
171
+
172
+ wav = tts.codec.decode_full(tokens_tq)
173
+ save_audio(args.out, wav, sr=TARGET_SR)
173
174
 
174
175
  t_end = time.perf_counter()
175
176
  if not args.quiet:
@@ -793,7 +793,7 @@ class SoproTTS:
793
793
  )
794
794
  return ref
795
795
 
796
- @torch.no_grad()
796
+ @torch.inference_mode()
797
797
  def synthesize(
798
798
  self,
799
799
  text: str,
@@ -145,7 +145,7 @@ class SoproTTSStreamer:
145
145
  if wav is not None:
146
146
  yield wav
147
147
 
148
-
148
+ @torch.inference_mode()
149
149
  def stream(
150
150
  tts: SoproTTS,
151
151
  text: str,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes