sopro 1.0.0__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {sopro-1.0.0 → sopro-1.0.2}/PKG-INFO +2 -2
  2. {sopro-1.0.0 → sopro-1.0.2}/README.md +1 -1
  3. {sopro-1.0.0 → sopro-1.0.2}/pyproject.toml +1 -1
  4. {sopro-1.0.0 → sopro-1.0.2}/src/Sopro.egg-info/PKG-INFO +2 -2
  5. {sopro-1.0.0 → sopro-1.0.2}/src/sopro/__init__.py +1 -1
  6. {sopro-1.0.0 → sopro-1.0.2}/src/sopro/cli.py +70 -69
  7. {sopro-1.0.0 → sopro-1.0.2}/src/sopro/model.py +1 -1
  8. {sopro-1.0.0 → sopro-1.0.2}/src/sopro/streaming.py +1 -1
  9. {sopro-1.0.0 → sopro-1.0.2}/LICENSE.txt +0 -0
  10. {sopro-1.0.0 → sopro-1.0.2}/setup.cfg +0 -0
  11. {sopro-1.0.0 → sopro-1.0.2}/src/Sopro.egg-info/SOURCES.txt +0 -0
  12. {sopro-1.0.0 → sopro-1.0.2}/src/Sopro.egg-info/dependency_links.txt +0 -0
  13. {sopro-1.0.0 → sopro-1.0.2}/src/Sopro.egg-info/entry_points.txt +0 -0
  14. {sopro-1.0.0 → sopro-1.0.2}/src/Sopro.egg-info/requires.txt +0 -0
  15. {sopro-1.0.0 → sopro-1.0.2}/src/Sopro.egg-info/top_level.txt +0 -0
  16. {sopro-1.0.0 → sopro-1.0.2}/src/sopro/audio.py +0 -0
  17. {sopro-1.0.0 → sopro-1.0.2}/src/sopro/codec/__init__.py +0 -0
  18. {sopro-1.0.0 → sopro-1.0.2}/src/sopro/codec/mimi.py +0 -0
  19. {sopro-1.0.0 → sopro-1.0.2}/src/sopro/config.py +0 -0
  20. {sopro-1.0.0 → sopro-1.0.2}/src/sopro/constants.py +0 -0
  21. {sopro-1.0.0 → sopro-1.0.2}/src/sopro/hub.py +0 -0
  22. {sopro-1.0.0 → sopro-1.0.2}/src/sopro/nn/__init__.py +0 -0
  23. {sopro-1.0.0 → sopro-1.0.2}/src/sopro/nn/blocks.py +0 -0
  24. {sopro-1.0.0 → sopro-1.0.2}/src/sopro/nn/embeddings.py +0 -0
  25. {sopro-1.0.0 → sopro-1.0.2}/src/sopro/nn/speaker.py +0 -0
  26. {sopro-1.0.0 → sopro-1.0.2}/src/sopro/nn/xattn.py +0 -0
  27. {sopro-1.0.0 → sopro-1.0.2}/src/sopro/sampling.py +0 -0
  28. {sopro-1.0.0 → sopro-1.0.2}/src/sopro/tokenizer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sopro
3
- Version: 1.0.0
3
+ Version: 1.0.2
4
4
  Summary: A lightweight text-to-speech model with zero-shot voice cloning.
5
5
  Author-email: Samuel Vitorino <samvitorino@gmail.com>
6
6
  License: Apache 2.0
@@ -53,7 +53,7 @@ conda activate soprotts
53
53
  ### From PyPI
54
54
 
55
55
  ```bash
56
- pip install sopro-tts
56
+ pip install sopro
57
57
  ```
58
58
 
59
59
  ### From the repo
@@ -30,7 +30,7 @@ conda activate soprotts
30
30
  ### From PyPI
31
31
 
32
32
  ```bash
33
- pip install sopro-tts
33
+ pip install sopro
34
34
  ```
35
35
 
36
36
  ### From the repo
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "sopro"
7
- version = "1.0.0"
7
+ version = "1.0.2"
8
8
  description = "A lightweight text-to-speech model with zero-shot voice cloning."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sopro
3
- Version: 1.0.0
3
+ Version: 1.0.2
4
4
  Summary: A lightweight text-to-speech model with zero-shot voice cloning.
5
5
  Author-email: Samuel Vitorino <samvitorino@gmail.com>
6
6
  License: Apache 2.0
@@ -53,7 +53,7 @@ conda activate soprotts
53
53
  ### From PyPI
54
54
 
55
55
  ```bash
56
- pip install sopro-tts
56
+ pip install sopro
57
57
  ```
58
58
 
59
59
  ### From the repo
@@ -3,4 +3,4 @@ from __future__ import annotations
3
3
  from .model import SoproTTS
4
4
 
5
5
  __all__ = ["SoproTTS"]
6
- __version__ = "1.0.0"
6
+ __version__ = "1.0.2"
@@ -97,79 +97,80 @@ def main() -> None:
97
97
  arr = np.load(args.ref_tokens)
98
98
  ref_tokens_tq = torch.from_numpy(arr).long()
99
99
 
100
- text_ids = tts.encode_text(args.text)
101
- ref = tts.encode_reference(
102
- ref_audio_path=args.ref_audio,
103
- ref_tokens_tq=ref_tokens_tq,
104
- ref_seconds=args.ref_seconds,
105
- )
100
+ with torch.inference_mode():
101
+ text_ids = tts.encode_text(args.text)
102
+ ref = tts.encode_reference(
103
+ ref_audio_path=args.ref_audio,
104
+ ref_tokens_tq=ref_tokens_tq,
105
+ ref_seconds=args.ref_seconds,
106
+ )
106
107
 
107
- prep = tts.model.prepare_conditioning(
108
- text_ids,
109
- ref,
110
- max_frames=args.max_frames,
111
- device=tts.device,
112
- style_strength=float(
113
- args.style_strength
114
- if args.style_strength is not None
115
- else cfg.style_strength
116
- ),
117
- )
108
+ prep = tts.model.prepare_conditioning(
109
+ text_ids,
110
+ ref,
111
+ max_frames=args.max_frames,
112
+ device=tts.device,
113
+ style_strength=float(
114
+ args.style_strength
115
+ if args.style_strength is not None
116
+ else cfg.style_strength
117
+ ),
118
+ )
118
119
 
119
- t_start = time.perf_counter()
120
+ t_start = time.perf_counter()
120
121
 
121
- hist_A: list[int] = []
122
- pbar = tqdm(
123
- total=args.max_frames,
124
- desc="AR sampling",
125
- unit="frame",
126
- disable=args.quiet,
127
- )
122
+ hist_A: list[int] = []
123
+ pbar = tqdm(
124
+ total=args.max_frames,
125
+ desc="AR sampling",
126
+ unit="frame",
127
+ disable=args.quiet,
128
+ )
128
129
 
129
- for _t, rvq1, p_stop in tts.model.ar_stream(
130
- prep,
131
- max_frames=args.max_frames,
132
- top_p=args.top_p,
133
- temperature=args.temperature,
134
- anti_loop=(not args.no_anti_loop),
135
- use_prefix=(not args.no_prefix),
136
- prefix_sec_fixed=args.prefix_sec,
137
- use_stop_head=(False if args.no_stop_head else None),
138
- stop_patience=args.stop_patience,
139
- stop_threshold=args.stop_threshold,
140
- ):
141
- hist_A.append(int(rvq1))
142
- pbar.update(1)
143
- if p_stop is None:
144
- pbar.set_postfix(p_stop="off")
145
- else:
146
- pbar.set_postfix(p_stop=f"{float(p_stop):.2f}")
147
-
148
- pbar.n = len(hist_A)
149
- pbar.close()
150
-
151
- t_after_sampling = time.perf_counter()
152
-
153
- T = len(hist_A)
154
- if T == 0:
155
- save_audio(args.out, torch.zeros(1, 0), sr=TARGET_SR)
156
- t_end = time.perf_counter()
157
- if not args.quiet:
158
- print(
159
- f"[Timing] sampling={t_after_sampling - t_start:.2f}s, "
160
- f"postproc+decode+save={t_end - t_after_sampling:.2f}s, "
161
- f"total={t_end - t_start:.2f}s"
162
- )
163
- print(f"[Done] Wrote {args.out}")
164
- return
165
-
166
- tokens_A = torch.tensor(hist_A, device=tts.device, dtype=torch.long).unsqueeze(0)
167
- cond_seq = prep["cond_all"][:, :T, :]
168
- tokens_1xTQ = tts.model.nar_refine(cond_seq, tokens_A)
169
- tokens_tq = tokens_1xTQ.squeeze(0)
170
-
171
- wav = tts.codec.decode_full(tokens_tq)
172
- save_audio(args.out, wav, sr=TARGET_SR)
130
+ for _t, rvq1, p_stop in tts.model.ar_stream(
131
+ prep,
132
+ max_frames=args.max_frames,
133
+ top_p=args.top_p,
134
+ temperature=args.temperature,
135
+ anti_loop=(not args.no_anti_loop),
136
+ use_prefix=(not args.no_prefix),
137
+ prefix_sec_fixed=args.prefix_sec,
138
+ use_stop_head=(False if args.no_stop_head else None),
139
+ stop_patience=args.stop_patience,
140
+ stop_threshold=args.stop_threshold,
141
+ ):
142
+ hist_A.append(int(rvq1))
143
+ pbar.update(1)
144
+ if p_stop is None:
145
+ pbar.set_postfix(p_stop="off")
146
+ else:
147
+ pbar.set_postfix(p_stop=f"{float(p_stop):.2f}")
148
+
149
+ pbar.n = len(hist_A)
150
+ pbar.close()
151
+
152
+ t_after_sampling = time.perf_counter()
153
+
154
+ T = len(hist_A)
155
+ if T == 0:
156
+ save_audio(args.out, torch.zeros(1, 0), sr=TARGET_SR)
157
+ t_end = time.perf_counter()
158
+ if not args.quiet:
159
+ print(
160
+ f"[Timing] sampling={t_after_sampling - t_start:.2f}s, "
161
+ f"postproc+decode+save={t_end - t_after_sampling:.2f}s, "
162
+ f"total={t_end - t_start:.2f}s"
163
+ )
164
+ print(f"[Done] Wrote {args.out}")
165
+ return
166
+
167
+ tokens_A = torch.tensor(hist_A, device=tts.device, dtype=torch.long).unsqueeze(0)
168
+ cond_seq = prep["cond_all"][:, :T, :]
169
+ tokens_1xTQ = tts.model.nar_refine(cond_seq, tokens_A)
170
+ tokens_tq = tokens_1xTQ.squeeze(0)
171
+
172
+ wav = tts.codec.decode_full(tokens_tq)
173
+ save_audio(args.out, wav, sr=TARGET_SR)
173
174
 
174
175
  t_end = time.perf_counter()
175
176
  if not args.quiet:
@@ -793,7 +793,7 @@ class SoproTTS:
793
793
  )
794
794
  return ref
795
795
 
796
- @torch.no_grad()
796
+ @torch.inference_mode()
797
797
  def synthesize(
798
798
  self,
799
799
  text: str,
@@ -145,7 +145,7 @@ class SoproTTSStreamer:
145
145
  if wav is not None:
146
146
  yield wav
147
147
 
148
-
148
+ @torch.inference_mode()
149
149
  def stream(
150
150
  tts: SoproTTS,
151
151
  text: str,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes