typecast-python 0.3.0__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,8 @@
10
10
  .env.*.local
11
11
  *.env
12
12
  !.env.example
13
+ config.yaml
14
+ credentials.json
13
15
 
14
16
  # ----------------
15
17
  # Node.js / JavaScript
@@ -87,6 +89,12 @@ Makefile
87
89
  !typecast-java/Makefile
88
90
  !typecast-rust/Makefile
89
91
  !typecast-kotlin/Makefile
92
+ !typecast-dart/Makefile
93
+ !typecast-dart/lib/
94
+ !typecast-dart/lib/**
95
+ !typecast-ruby/Makefile
96
+ !typecast-ruby/lib/
97
+ !typecast-ruby/lib/**
90
98
  *.cmake
91
99
 
92
100
  # Compiled objects
@@ -231,6 +239,13 @@ out/
231
239
  Cargo.lock
232
240
  *~
233
241
 
242
+ # ----------------
243
+ # Ruby
244
+ # ----------------
245
+ .bundle/
246
+ Gemfile.lock
247
+ vendor/bundle/
248
+
234
249
  # ----------------
235
250
  # Swift
236
251
  # ----------------
@@ -280,6 +295,12 @@ fastlane/test_output
280
295
  # Code Injection
281
296
  iOSInjectionProject/
282
297
 
298
+ # ----------------
299
+ # Dart
300
+ # ----------------
301
+ .dart_tool/
302
+ pubspec.lock
303
+
283
304
  # ----------------
284
305
  # IDE / Editor
285
306
  # ----------------
@@ -314,6 +335,9 @@ logs/
314
335
  # Audio output files (in root directories, not source code)
315
336
  *.wav
316
337
  *.mp3
338
+ # Exception: shared test fixtures
339
+ !test-fixtures/**/*.wav
340
+ !test-fixtures/**/*.mp3
317
341
  # Note: Removed 'output.*' pattern as it incorrectly matches Output.java/Output.cs source files
318
342
 
319
343
  # ----------------
@@ -321,4 +345,4 @@ logs/
321
345
  # ----------------
322
346
  .npmrc
323
347
  openapi.json
324
- PR.md
348
+ PR.md
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: typecast-python
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: Official Typecast Python SDK - Convert text to lifelike speech using AI-powered voices
5
5
  Project-URL: Homepage, https://typecast.ai
6
6
  Project-URL: Documentation, https://typecast.ai/docs/overview
@@ -223,7 +223,7 @@ Classifier: Programming Language :: Python :: 3.13
223
223
  Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
224
224
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
225
225
  Requires-Python: >=3.11
226
- Requires-Dist: aiohttp>=3.8.0
226
+ Requires-Dist: aiohttp>=3.14.0
227
227
  Requires-Dist: pydantic>=2.0.0
228
228
  Requires-Dist: requests>=2.28.0
229
229
  Requires-Dist: typing-extensions>=4.0.0
@@ -504,6 +504,53 @@ the two alignment arrays. For non-whitespace languages (Japanese,
504
504
  Chinese), pair with `granularity="char"` — word-level alignment will
505
505
  collapse the entire sentence into a single segment.
506
506
 
507
+ ### Instant cloning
508
+
509
+ Clone a custom voice from a short audio sample (≤ 25 MB), then use it just like any built-in voice. The cloned voice ID has a `uc_` prefix and works with `text_to_speech` directly.
510
+
511
+ ```python
512
+ from typecast import Typecast
513
+ from typecast.models import TTSRequest
514
+
515
+ client = Typecast(api_key="YOUR_API_KEY")
516
+
517
+ # 1) Clone
518
+ voice = client.clone_voice(
519
+ audio="path/to/sample.wav", # str path | Path | bytes | file object
520
+ name="my-voice", # 1-30 chars
521
+ model="ssfm-v30", # or "ssfm-v21"
522
+ )
523
+ print(voice.voice_id) # uc_64a1b2...
524
+
525
+ # 2) Synthesize with the cloned voice
526
+ audio = client.text_to_speech(TTSRequest(
527
+ text="Hello from my cloned voice!",
528
+ voice_id=voice.voice_id,
529
+ model="ssfm-v30",
530
+ ))
531
+ with open("output.wav", "wb") as f:
532
+ f.write(audio.audio_data)
533
+
534
+ # 3) Delete when done
535
+ client.delete_voice(voice.voice_id)
536
+ ```
537
+
538
+ **Limits**
539
+
540
+ - Audio file: max 25 MB. Supported formats: WAV, MP3.
541
+ - Voice name: 1–30 characters.
542
+ - Model: `ssfm-v21` or `ssfm-v30`.
543
+
544
+ **Async usage** is identical via `AsyncTypecast`:
545
+
546
+ ```python
547
+ from typecast import AsyncTypecast
548
+
549
+ async with AsyncTypecast(api_key="YOUR_API_KEY") as client:
550
+ voice = await client.clone_voice(audio="sample.wav", name="my-voice", model="ssfm-v30")
551
+ await client.delete_voice(voice.voice_id)
552
+ ```
553
+
507
554
  ---
508
555
 
509
556
  ## Supported Languages
@@ -263,6 +263,53 @@ the two alignment arrays. For non-whitespace languages (Japanese,
263
263
  Chinese), pair with `granularity="char"` — word-level alignment will
264
264
  collapse the entire sentence into a single segment.
265
265
 
266
+ ### Instant cloning
267
+
268
+ Clone a custom voice from a short audio sample (≤ 25 MB), then use it just like any built-in voice. The cloned voice ID has a `uc_` prefix and works with `text_to_speech` directly.
269
+
270
+ ```python
271
+ from typecast import Typecast
272
+ from typecast.models import TTSRequest
273
+
274
+ client = Typecast(api_key="YOUR_API_KEY")
275
+
276
+ # 1) Clone
277
+ voice = client.clone_voice(
278
+ audio="path/to/sample.wav", # str path | Path | bytes | file object
279
+ name="my-voice", # 1-30 chars
280
+ model="ssfm-v30", # or "ssfm-v21"
281
+ )
282
+ print(voice.voice_id) # uc_64a1b2...
283
+
284
+ # 2) Synthesize with the cloned voice
285
+ audio = client.text_to_speech(TTSRequest(
286
+ text="Hello from my cloned voice!",
287
+ voice_id=voice.voice_id,
288
+ model="ssfm-v30",
289
+ ))
290
+ with open("output.wav", "wb") as f:
291
+ f.write(audio.audio_data)
292
+
293
+ # 3) Delete when done
294
+ client.delete_voice(voice.voice_id)
295
+ ```
296
+
297
+ **Limits**
298
+
299
+ - Audio file: max 25 MB. Supported formats: WAV, MP3.
300
+ - Voice name: 1–30 characters.
301
+ - Model: `ssfm-v21` or `ssfm-v30`.
302
+
303
+ **Async usage** is identical via `AsyncTypecast`:
304
+
305
+ ```python
306
+ from typecast import AsyncTypecast
307
+
308
+ async with AsyncTypecast(api_key="YOUR_API_KEY") as client:
309
+ voice = await client.clone_voice(audio="sample.wav", name="my-voice", model="ssfm-v30")
310
+ await client.delete_voice(voice.voice_id)
311
+ ```
312
+
266
313
  ---
267
314
 
268
315
  ## Supported Languages
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "typecast-python"
7
- version = "0.3.0"
7
+ version = "0.3.2"
8
8
  description = "Official Typecast Python SDK - Convert text to lifelike speech using AI-powered voices"
9
9
  authors = [
10
10
  {name = "Neosapience", email = "help@typecast.ai"}
@@ -29,7 +29,7 @@ classifiers = [
29
29
  "Operating System :: OS Independent",
30
30
  ]
31
31
  dependencies = [
32
- "aiohttp>=3.8.0",
32
+ "aiohttp>=3.14.0",
33
33
  "requests>=2.28.0",
34
34
  "pydantic>=2.0.0",
35
35
  "typing-extensions>=4.0.0",
@@ -0,0 +1,89 @@
1
+ """Internal helpers for instant cloning (sync/async shared)."""
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ from pathlib import Path
6
+ from typing import BinaryIO, Union
7
+
8
+ CLONING_MAX_FILE_SIZE = 25 * 1024 * 1024 # must match typecast-api `cloning_max_file_size`
9
+ NAME_MIN_LENGTH = 1
10
+ NAME_MAX_LENGTH = 30
11
+ ALLOWED_CLONE_MODELS = frozenset({"ssfm-v21", "ssfm-v30"})
12
+ CUSTOM_VOICE_ID_PREFIX = "uc_"
13
+
14
+ AudioInput = Union[str, Path, bytes, BinaryIO]
15
+
16
+
17
+ def normalize_clone_model(model: object) -> str:
18
+ """Coerce ``model`` to its string form and reject values outside the API contract.
19
+
20
+ Accepts a ``TTSModel`` enum (uses ``.value``) or a string. Raises ``ValueError``
21
+ when the resolved value is not in :data:`ALLOWED_CLONE_MODELS` so callers fail
22
+ fast client-side instead of relying on a 422 from the API.
23
+ """
24
+ model_str = model.value if hasattr(model, "value") else str(model)
25
+ if model_str not in ALLOWED_CLONE_MODELS:
26
+ allowed = ", ".join(sorted(ALLOWED_CLONE_MODELS))
27
+ raise ValueError(f"model must be one of: {allowed}; got {model_str!r}")
28
+ return model_str
29
+
30
+
31
+ def validate_custom_voice_id(voice_id: str) -> None:
32
+ """Reject non-custom voice ids before they reach the DELETE endpoint."""
33
+ if not isinstance(voice_id, str) or not voice_id.startswith(CUSTOM_VOICE_ID_PREFIX):
34
+ raise ValueError(
35
+ f"voice_id must start with {CUSTOM_VOICE_ID_PREFIX!r}; got {voice_id!r}"
36
+ )
37
+
38
+
39
+ def validate_clone_inputs(audio: AudioInput, name: str) -> tuple[bytes, str]:
40
+ """Pre-validate `clone_voice` inputs and return (audio_bytes, filename).
41
+
42
+ Args:
43
+ audio: One of file path (str/Path), raw bytes, or readable binary file object.
44
+ name: Voice name (1-30 chars).
45
+
46
+ Returns:
47
+ (audio_bytes, filename) — filename is derived from the path/file object,
48
+ or defaults to "audio.wav" when caller passes raw bytes.
49
+
50
+ Raises:
51
+ ValueError: name length out of range or file too large.
52
+ FileNotFoundError: path argument refers to a non-existent file.
53
+ TypeError: audio is none of the accepted types.
54
+ """
55
+ if not (NAME_MIN_LENGTH <= len(name) <= NAME_MAX_LENGTH):
56
+ raise ValueError(
57
+ f"name must be {NAME_MIN_LENGTH}-{NAME_MAX_LENGTH} characters; got {len(name)}"
58
+ )
59
+
60
+ if isinstance(audio, (str, Path)):
61
+ path = Path(audio)
62
+ if not path.exists() or not path.is_file():
63
+ raise FileNotFoundError(f"audio file not found: {path}")
64
+ audio_bytes = path.read_bytes()
65
+ filename = path.name
66
+ elif isinstance(audio, (bytes, bytearray)):
67
+ audio_bytes = bytes(audio)
68
+ filename = "audio.wav"
69
+ elif hasattr(audio, "read"):
70
+ audio_bytes = audio.read()
71
+ if isinstance(audio_bytes, bytearray):
72
+ audio_bytes = bytes(audio_bytes)
73
+ if not isinstance(audio_bytes, bytes):
74
+ raise TypeError(
75
+ "audio file object must be opened in binary mode and return bytes"
76
+ )
77
+ raw_name = getattr(audio, "name", None) or "audio.wav"
78
+ filename = os.path.basename(str(raw_name).replace("\\", "/"))
79
+ else:
80
+ raise TypeError(
81
+ "audio must be a file path (str/Path), bytes, or readable binary file object"
82
+ )
83
+
84
+ if len(audio_bytes) > CLONING_MAX_FILE_SIZE:
85
+ raise ValueError(
86
+ f"audio file exceeds 25MB limit; got {len(audio_bytes)} bytes"
87
+ )
88
+
89
+ return audio_bytes, filename
@@ -1,8 +1,16 @@
1
- from typing import AsyncIterator, Optional
1
+ from pathlib import Path
2
+ from typing import AsyncIterator, BinaryIO, Optional, Union
3
+ from urllib.parse import quote
2
4
 
3
5
  import aiohttp
4
6
 
5
7
  from . import conf
8
+ from ._voice_clone import (
9
+ normalize_clone_model,
10
+ validate_clone_inputs,
11
+ validate_custom_voice_id,
12
+ )
13
+ from .client import _guess_audio_mime
6
14
  from .exceptions import (
7
15
  BadRequestError,
8
16
  InternalServerError,
@@ -14,7 +22,9 @@ from .exceptions import (
14
22
  UnprocessableEntityError,
15
23
  )
16
24
  from .models import (
25
+ CustomVoice,
17
26
  SubscriptionResponse,
27
+ TTSModel,
18
28
  TTSRequest,
19
29
  TTSRequestStream,
20
30
  TTSRequestWithTimestamps,
@@ -61,7 +71,9 @@ class AsyncTypecast:
61
71
  self.session: Optional[aiohttp.ClientSession] = None
62
72
 
63
73
  async def __aenter__(self):
64
- headers = {"Content-Type": "application/json"}
74
+ # Auth header at session scope; per-request Content-Type is set by aiohttp
75
+ # (json= auto-sets application/json, data=FormData() auto-sets multipart).
76
+ headers = {}
65
77
  if self.api_key:
66
78
  headers["X-API-KEY"] = self.api_key
67
79
  self.session = aiohttp.ClientSession(headers=headers)
@@ -212,6 +224,77 @@ class AsyncTypecast:
212
224
  data = await response.json()
213
225
  return TTSWithTimestampsResponse.model_validate(data)
214
226
 
227
+ async def clone_voice(
228
+ self,
229
+ audio: Union[str, Path, bytes, BinaryIO],
230
+ name: str,
231
+ model: Union[str, "TTSModel"],
232
+ ) -> CustomVoice:
233
+ """Create a quick-cloned custom voice from an audio sample (async).
234
+
235
+ Args:
236
+ audio: Audio sample. Accepts file path (str/Path), raw bytes,
237
+ or a readable binary file object. Max 25 MB.
238
+ name: Voice name, 1-30 characters.
239
+ model: Engine model. ``"ssfm-v21"`` or ``"ssfm-v30"`` (or ``TTSModel`` enum).
240
+
241
+ Returns:
242
+ ``CustomVoice`` with ``voice_id`` (uc_ prefix), ``name``, and ``model``.
243
+
244
+ Raises:
245
+ ValueError: name length out of range or audio exceeds 25 MB.
246
+ FileNotFoundError: ``audio`` is a path to a non-existent file.
247
+ TypecastError: client session not initialized or HTTP error.
248
+ """
249
+ if self.session is None:
250
+ raise TypecastError("Client session not initialized; use 'async with'.")
251
+
252
+ audio_bytes, filename = validate_clone_inputs(audio, name)
253
+ model_str = normalize_clone_model(model)
254
+
255
+ form = aiohttp.FormData()
256
+ form.add_field("name", name)
257
+ form.add_field("model", model_str)
258
+ form.add_field(
259
+ "file",
260
+ audio_bytes,
261
+ filename=filename,
262
+ content_type=_guess_audio_mime(filename),
263
+ )
264
+ timeout = aiohttp.ClientTimeout(total=300, connect=10)
265
+ async with self.session.post(
266
+ f"{self.host}/v1/voices/clone",
267
+ data=form,
268
+ timeout=timeout,
269
+ ) as response:
270
+ if response.status != 200:
271
+ text = await response.text()
272
+ self._handle_error(response.status, text)
273
+ body = await response.json()
274
+ return CustomVoice.model_validate(body)
275
+
276
+ async def delete_voice(self, voice_id: str) -> None:
277
+ """Soft-delete a custom voice (async).
278
+
279
+ Args:
280
+ voice_id: Voice identifier with ``uc_`` prefix.
281
+
282
+ Raises:
283
+ TypecastError subclasses: per HTTP status from the API.
284
+ """
285
+ if self.session is None:
286
+ raise TypecastError("Client session not initialized; use 'async with'.")
287
+
288
+ validate_custom_voice_id(voice_id)
289
+ timeout = aiohttp.ClientTimeout(total=60, connect=10)
290
+ async with self.session.delete(
291
+ f"{self.host}/v1/voices/{quote(voice_id, safe='')}",
292
+ timeout=timeout,
293
+ ) as response:
294
+ if response.status not in (200, 204):
295
+ text = await response.text()
296
+ self._handle_error(response.status, text)
297
+
215
298
  async def voices(self, model: Optional[str] = None) -> list[VoicesResponse]:
216
299
  """Get available voices (V1 API) asynchronously.
217
300
 
@@ -1,8 +1,15 @@
1
- from typing import Iterator, Optional
1
+ from pathlib import Path
2
+ from typing import BinaryIO, Iterator, Optional, Union
3
+ from urllib.parse import quote
2
4
 
3
5
  import requests
4
6
 
5
7
  from . import conf
8
+ from ._voice_clone import (
9
+ normalize_clone_model,
10
+ validate_clone_inputs,
11
+ validate_custom_voice_id,
12
+ )
6
13
  from .exceptions import (
7
14
  BadRequestError,
8
15
  InternalServerError,
@@ -14,7 +21,9 @@ from .exceptions import (
14
21
  UnprocessableEntityError,
15
22
  )
16
23
  from .models import (
24
+ CustomVoice,
17
25
  SubscriptionResponse,
26
+ TTSModel,
18
27
  TTSRequest,
19
28
  TTSRequestStream,
20
29
  TTSRequestWithTimestamps,
@@ -26,6 +35,16 @@ from .models import (
26
35
  )
27
36
 
28
37
 
38
+ def _guess_audio_mime(filename: str) -> str:
39
+ """Guess audio MIME type from filename extension; fall back to octet-stream."""
40
+ lower = filename.lower()
41
+ if lower.endswith(".wav"):
42
+ return "audio/wav"
43
+ if lower.endswith(".mp3"):
44
+ return "audio/mpeg"
45
+ return "application/octet-stream"
46
+
47
+
29
48
  class Typecast:
30
49
  """Synchronous client for the Typecast Text-to-Speech API.
31
50
 
@@ -202,6 +221,67 @@ class Typecast:
202
221
  self._handle_error(response.status_code, response.text)
203
222
  return TTSWithTimestampsResponse.model_validate(response.json())
204
223
 
224
+ def clone_voice(
225
+ self,
226
+ audio: Union[str, Path, bytes, BinaryIO],
227
+ name: str,
228
+ model: Union[str, "TTSModel"],
229
+ ) -> CustomVoice:
230
+ """Create a quick-cloned custom voice from an audio sample.
231
+
232
+ Args:
233
+ audio: Audio sample. Accepts file path (str/Path), raw bytes,
234
+ or a readable binary file object. Max 25 MB.
235
+ name: Voice name, 1-30 characters.
236
+ model: Engine model. ``"ssfm-v21"`` or ``"ssfm-v30"`` (or ``TTSModel`` enum).
237
+
238
+ Returns:
239
+ ``CustomVoice`` with ``voice_id`` (uc_ prefix), ``name``, and ``model``.
240
+ Use ``voice_id`` directly with ``text_to_speech`` to synthesize.
241
+
242
+ Raises:
243
+ ValueError: name length out of range or audio exceeds 25 MB.
244
+ FileNotFoundError: ``audio`` is a path to a non-existent file.
245
+ TypecastError subclasses: per HTTP status from the API.
246
+ """
247
+ audio_bytes, filename = validate_clone_inputs(audio, name)
248
+ model_str = normalize_clone_model(model)
249
+
250
+ files = {
251
+ "file": (filename, audio_bytes, _guess_audio_mime(filename)),
252
+ }
253
+ data = {"name": name, "model": model_str}
254
+ # Remove the session-level Content-Type so requests can set the
255
+ # correct multipart/form-data boundary for this request.
256
+ response = self.session.post(
257
+ f"{self.host}/v1/voices/clone",
258
+ files=files,
259
+ data=data,
260
+ headers={"Content-Type": None},
261
+ timeout=(10, 300),
262
+ )
263
+ if response.status_code != 200:
264
+ self._handle_error(response.status_code, response.text)
265
+ return CustomVoice.model_validate(response.json())
266
+
267
+ def delete_voice(self, voice_id: str) -> None:
268
+ """Soft-delete a custom voice.
269
+
270
+ Args:
271
+ voice_id: Voice identifier with ``uc_`` prefix (returned by ``clone_voice``).
272
+
273
+ Raises:
274
+ TypecastError subclasses: per HTTP status from the API
275
+ (e.g., ``NotFoundError`` if the voice doesn't exist or isn't owned).
276
+ """
277
+ validate_custom_voice_id(voice_id)
278
+ response = self.session.delete(
279
+ f"{self.host}/v1/voices/{quote(voice_id, safe='')}",
280
+ timeout=(10, 60),
281
+ )
282
+ if response.status_code not in (200, 204):
283
+ self._handle_error(response.status_code, response.text)
284
+
205
285
  def voices(self, model: Optional[str] = None) -> list[VoicesResponse]:
206
286
  """Get available voices (V1 API).
207
287
 
@@ -20,6 +20,7 @@ from .tts import (
20
20
  )
21
21
  from .voices import (
22
22
  AgeEnum,
23
+ CustomVoice,
23
24
  GenderEnum,
24
25
  ModelInfo,
25
26
  UseCaseEnum,
@@ -33,6 +34,7 @@ __all__ = [
33
34
  "AlignmentSegmentCharacter",
34
35
  "AlignmentSegmentWord",
35
36
  "Credits",
37
+ "CustomVoice",
36
38
  "EmotionPreset",
37
39
  "Error",
38
40
  "GenderEnum",
@@ -1,7 +1,7 @@
1
1
  from enum import Enum
2
2
  from typing import Optional
3
3
 
4
- from pydantic import BaseModel
4
+ from pydantic import BaseModel, Field
5
5
 
6
6
  from .tts import TTSModel
7
7
 
@@ -75,3 +75,18 @@ class VoicesV2Filter(BaseModel):
75
75
  gender: Optional[GenderEnum] = None
76
76
  age: Optional[AgeEnum] = None
77
77
  use_cases: Optional[UseCaseEnum] = None
78
+
79
+
80
+ class CustomVoice(BaseModel):
81
+ """Quick-cloned custom voice returned by `POST /v1/voices/clone`.
82
+
83
+ Attributes:
84
+ voice_id: Custom voice identifier with `uc_` prefix.
85
+ Use this value as `voice_id` in `text_to_speech` / `text_to_speech_with_timestamps`.
86
+ name: Human-readable name (1-30 chars).
87
+ model: Engine model the voice was cloned for (`ssfm-v21` or `ssfm-v30`).
88
+ """
89
+
90
+ voice_id: str = Field(..., description="Custom voice identifier (uc_ prefix)")
91
+ name: str = Field(..., description="Human-readable voice name")
92
+ model: str = Field(..., description="Engine model: ssfm-v21 or ssfm-v30")
File without changes