soniox 2.6.0__tar.gz → 2.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {soniox-2.6.0 → soniox-2.7.0}/CHANGELOG.md +24 -0
- {soniox-2.6.0 → soniox-2.7.0}/PKG-INFO +1 -1
- {soniox-2.6.0 → soniox-2.7.0}/docs/async_client.md +16 -8
- {soniox-2.6.0 → soniox-2.7.0}/docs/types.md +33 -15
- {soniox-2.6.0 → soniox-2.7.0}/examples/async_soniox_client/tts_realtime_example.py +1 -3
- {soniox-2.6.0 → soniox-2.7.0}/examples/async_soniox_client/tts_realtime_multiplexed_example.py +3 -8
- {soniox-2.6.0 → soniox-2.7.0}/examples/soniox_client/tts_realtime_multiplexed_example.py +3 -10
- {soniox-2.6.0 → soniox-2.7.0}/pyproject.toml +1 -1
- {soniox-2.6.0 → soniox-2.7.0}/scripts/generate_docs.py +19 -5
- soniox-2.7.0/src/soniox/api/_utils.py +227 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/api/async_stt.py +0 -1
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/api/async_tts.py +27 -23
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/api/stt.py +0 -1
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/api/tts.py +28 -24
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/realtime/async_tts.py +2 -12
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/realtime/tts.py +2 -12
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/types/api.py +61 -25
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/types/realtime.py +6 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/_sdk_bindings.py +0 -1
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/test_async_tts_workflows.py +9 -7
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/test_translate.py +31 -3
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/test_tts_workflows.py +72 -8
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/test_types.py +11 -0
- {soniox-2.6.0 → soniox-2.7.0}/uv.lock +1 -1
- soniox-2.6.0/src/soniox/api/_utils.py +0 -118
- {soniox-2.6.0 → soniox-2.7.0}/.claude/settings.local.json +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/.gitignore +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/CODE_OF_CONDUCT.md +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/CONTRIBUTING.md +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/LICENSE +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/README.md +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/SECURITY.md +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/assets/coffee_shop.mp3 +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/assets/coffee_shop.pcm_s16le +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/assets/two_way_translation.mp3 +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/assets/two_way_translation.pcm_s16le +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/docs/realtime_client.md +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/docs/utils.md +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/examples/async_soniox_client/api_example.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/examples/async_soniox_client/realtime_example.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/examples/async_soniox_client/tts_api_example.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/examples/soniox_client/api_example.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/examples/soniox_client/realtime_example.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/examples/soniox_client/tts_api_example.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/examples/soniox_client/tts_realtime_example.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/justfile +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/__init__.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/api/__init__.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/api/async_auth.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/api/async_concurrency_limits.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/api/async_files.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/api/async_models.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/api/async_tts_models.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/api/async_usage_logs.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/api/async_webhooks.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/api/auth.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/api/concurrency_limits.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/api/files.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/api/models.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/api/tts_models.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/api/usage_logs.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/api/webhooks.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/client.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/errors.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/py.typed +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/realtime/__init__.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/realtime/_constants.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/realtime/_utils.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/realtime/async_stt.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/realtime/stt.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/types/__init__.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/types/common.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/types/webhooks.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/src/soniox/utils.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/__init__.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/conftest.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/data/openapi.json +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/helpers.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/realtime/__init__.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/realtime/cases.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/realtime/mock_ws.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/realtime/test_async_tts_realtime.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/realtime/test_chaos_realtime.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/realtime/test_fuzz_realtime.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/realtime/test_keepalive.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/realtime/test_live_ws.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/realtime/test_stt_realtime.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/realtime/test_tts_realtime.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/__init__.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/_openapi.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/test_api.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/test_async_client.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/test_async_stt_workflows.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/test_client.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/test_concurrency_limits.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/test_coverage_gaps.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/test_schema_drift.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/test_stt_extras.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/test_stt_workflows.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/test_sync_async_parity.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/test_usage_logs.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/test_utils.py +0 -0
- {soniox-2.6.0 → soniox-2.7.0}/tests/unit/test_webhooks.py +0 -0
|
@@ -73,6 +73,30 @@ Examples:
|
|
|
73
73
|
|
|
74
74
|
---
|
|
75
75
|
|
|
76
|
+
## [2.7.0] - 24 jun 2026
|
|
77
|
+
|
|
78
|
+
### Added
|
|
79
|
+
|
|
80
|
+
- `endpoint_latency_adjustment_level` field on `RealtimeSTTConfig` (integer 0–3) to fine-tune the latency/accuracy trade-off of realtime endpoint detection.
|
|
81
|
+
|
|
82
|
+
### Changed
|
|
83
|
+
|
|
84
|
+
- TTS REST output settings (`audio_format`, `sample_rate`, `bitrate`) now live on `CreateTtsConfig`; `generate()` and `generate_to_file()` take the utterance's `text`, `voice`, `model`, and `language` directly. Each field now has a single home (no more flat-vs-config overlap). Existing flat-keyword calls keep working (see Deprecated).
|
|
85
|
+
- During the deprecation overlap, when a deprecated field is set both on the config and as a flat argument, the config value now takes precedence uniformly across STT and TTS (previously `client_reference_id` resolved the other way).
|
|
86
|
+
|
|
87
|
+
### Deprecated
|
|
88
|
+
|
|
89
|
+
- Passing `audio_format`, `sample_rate`, or `bitrate` as keyword arguments to TTS `generate()` / `generate_to_file()` is deprecated; set them on `CreateTtsConfig` instead. The keyword arguments still work but emit a `DeprecationWarning` and will be removed in a future major release.
|
|
90
|
+
- Setting `model`, `voice`, or `language` on `CreateTtsConfig` is deprecated; pass them directly to `generate()` / `generate_to_file()` (they describe the utterance, not output encoding).
|
|
91
|
+
- Relying on the default TTS `language` (`"en"`) is deprecated; pass `language` explicitly to `generate()` / `generate_to_file()`. It will become a required argument in the next major release.
|
|
92
|
+
- Setting `model` or `client_reference_id` on `CreateTranscriptionConfig` is deprecated; pass them directly to the transcription `create*` calls.
|
|
93
|
+
|
|
94
|
+
### Removed
|
|
95
|
+
|
|
96
|
+
- The internal module constants `DEFAULT_LANGUAGE` and `DEFAULT_AUDIO_FORMAT` in `soniox.api.tts` / `soniox.api.async_tts`. The defaults (`"en"` / `"wav"`) are now applied inside payload construction. Behavior is unchanged.
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
76
100
|
## [2.6.0] - 15 jun 2026
|
|
77
101
|
|
|
78
102
|
### Added
|
|
@@ -1245,13 +1245,17 @@ AsyncTtsAPI(client: AsyncSonioxClient)
|
|
|
1245
1245
|
### generate()
|
|
1246
1246
|
|
|
1247
1247
|
```python
|
|
1248
|
-
generate(*, text: str, voice: str, model: str = DEFAULT_MODEL,
|
|
1248
|
+
generate(*, text: str, voice: str, model: str = DEFAULT_MODEL, config: CreateTtsConfig | None = None, language: str | None = None, audio_format: TtsAudioFormat | None = None, sample_rate: TtsSampleRate | None = None, bitrate: TtsBitrate | None = None) -> bytes
|
|
1249
1249
|
```
|
|
1250
1250
|
|
|
1251
1251
|
Generate speech audio from text and return raw audio bytes.
|
|
1252
1252
|
|
|
1253
1253
|
Performs a POST request to the TTS REST endpoint.
|
|
1254
1254
|
|
|
1255
|
+
``audio_format``/``sample_rate``/``bitrate`` are deprecated; set them on
|
|
1256
|
+
``CreateTtsConfig`` instead. Pass ``language`` explicitly — relying on the default
|
|
1257
|
+
("en") is deprecated and ``language`` will be required in the next major release.
|
|
1258
|
+
|
|
1255
1259
|
**Parameters**
|
|
1256
1260
|
|
|
1257
1261
|
| Parameter | Type | Description |
|
|
@@ -1259,11 +1263,11 @@ Performs a POST request to the TTS REST endpoint.
|
|
|
1259
1263
|
| `text` | `str` | Longer free-form background text, prior interaction history, reference documents, or meeting notes. |
|
|
1260
1264
|
| `voice` | `str` | Voice identifier to generate speech audio with. |
|
|
1261
1265
|
| `model` | `str` | Speech-to-text model to use. |
|
|
1262
|
-
| `
|
|
1263
|
-
| `
|
|
1266
|
+
| `config` | `CreateTtsConfig \| None` | Configuration options for this operation. |
|
|
1267
|
+
| `language` | `str \| None` | Language code for Text-to-Speech (e.g., "en"). |
|
|
1268
|
+
| `audio_format` | `TtsAudioFormat \| None` | Audio format for realtime transcription. |
|
|
1264
1269
|
| `sample_rate` | `TtsSampleRate \| None` | Audio sample rate in Hz. |
|
|
1265
1270
|
| `bitrate` | `TtsBitrate \| None` | Output bitrate in bits-per-second for compressed formats. |
|
|
1266
|
-
| `config` | `CreateTtsConfig \| None` | Configuration options for this operation. |
|
|
1267
1271
|
|
|
1268
1272
|
**Returns**
|
|
1269
1273
|
|
|
@@ -1280,11 +1284,15 @@ Performs a POST request to the TTS REST endpoint.
|
|
|
1280
1284
|
### generate_to_file()
|
|
1281
1285
|
|
|
1282
1286
|
```python
|
|
1283
|
-
generate_to_file(output: BinaryIO | Path | str, *, text: str, voice: str = DEFAULT_VOICE, model: str = DEFAULT_MODEL,
|
|
1287
|
+
generate_to_file(output: BinaryIO | Path | str, *, text: str, voice: str = DEFAULT_VOICE, model: str = DEFAULT_MODEL, config: CreateTtsConfig | None = None, language: str | None = None, audio_format: TtsAudioFormat | None = None, sample_rate: TtsSampleRate | None = None, bitrate: TtsBitrate | None = None) -> int
|
|
1284
1288
|
```
|
|
1285
1289
|
|
|
1286
1290
|
Generate speech audio from text and write the audio bytes to a file-like output.
|
|
1287
1291
|
|
|
1292
|
+
``audio_format``/``sample_rate``/``bitrate`` are deprecated; set them on
|
|
1293
|
+
``CreateTtsConfig`` instead. Pass ``language`` explicitly — relying on the default
|
|
1294
|
+
("en") is deprecated and ``language`` will be required in the next major release.
|
|
1295
|
+
|
|
1288
1296
|
**Parameters**
|
|
1289
1297
|
|
|
1290
1298
|
| Parameter | Type | Description |
|
|
@@ -1293,11 +1301,11 @@ Generate speech audio from text and write the audio bytes to a file-like output.
|
|
|
1293
1301
|
| `text` | `str` | Longer free-form background text, prior interaction history, reference documents, or meeting notes. |
|
|
1294
1302
|
| `voice` | `str` | Voice identifier to generate speech audio with. |
|
|
1295
1303
|
| `model` | `str` | Speech-to-text model to use. |
|
|
1296
|
-
| `
|
|
1297
|
-
| `
|
|
1304
|
+
| `config` | `CreateTtsConfig \| None` | Configuration options for this operation. |
|
|
1305
|
+
| `language` | `str \| None` | Language code for Text-to-Speech (e.g., "en"). |
|
|
1306
|
+
| `audio_format` | `TtsAudioFormat \| None` | Audio format for realtime transcription. |
|
|
1298
1307
|
| `sample_rate` | `TtsSampleRate \| None` | Audio sample rate in Hz. |
|
|
1299
1308
|
| `bitrate` | `TtsBitrate \| None` | Output bitrate in bits-per-second for compressed formats. |
|
|
1300
|
-
| `config` | `CreateTtsConfig \| None` | Configuration options for this operation. |
|
|
1301
1309
|
|
|
1302
1310
|
**Returns**
|
|
1303
1311
|
|
|
@@ -171,9 +171,9 @@ Helper config used when building Text-to-Speech payloads.
|
|
|
171
171
|
|
|
172
172
|
| Property | Type | Description |
|
|
173
173
|
| ------ | ------ | ------ |
|
|
174
|
-
| `model` | `str \| None` |
|
|
175
|
-
| `language` | `str \| None` |
|
|
176
|
-
| `voice` | `str \| None` |
|
|
174
|
+
| `model` | `str \| None` | Deprecated: pass ``model`` to generate()/generate_to_file() instead. |
|
|
175
|
+
| `language` | `str \| None` | Deprecated: pass ``language`` to generate()/generate_to_file() instead. |
|
|
176
|
+
| `voice` | `str \| None` | Deprecated: pass ``voice`` to generate()/generate_to_file() instead. |
|
|
177
177
|
| `audio_format` | `TtsAudioFormat \| None` | Requested output audio format. |
|
|
178
178
|
| `sample_rate` | `TtsSampleRate \| None` | Output sample rate in Hz. |
|
|
179
179
|
| `bitrate` | `TtsBitrate \| None` | Output bitrate in bits-per-second for compressed formats. |
|
|
@@ -216,7 +216,7 @@ Helper config used when building transcription payloads.
|
|
|
216
216
|
|
|
217
217
|
| Property | Type | Description |
|
|
218
218
|
| ------ | ------ | ------ |
|
|
219
|
-
| `model` | `str \| None` |
|
|
219
|
+
| `model` | `str \| None` | Deprecated: pass ``model`` to the create call instead. |
|
|
220
220
|
| `language_hints` | `list[LanguageCode] \| None` | Array of expected ISO language codes to bias recognition. |
|
|
221
221
|
| `language_hints_strict` | `bool \| None` | When true, model relies more heavily on language hints. |
|
|
222
222
|
| `enable_speaker_diarization` | `bool \| None` | Enable speaker diarization to identify different speakers. |
|
|
@@ -226,7 +226,7 @@ Helper config used when building transcription payloads.
|
|
|
226
226
|
| `webhook_url` | `str \| None` | URL to receive webhook notifications when transcription is completed or fails. |
|
|
227
227
|
| `webhook_auth_header_name` | `str \| None` | Name of the authentication header sent with webhook notifications |
|
|
228
228
|
| `webhook_auth_header_value` | `str \| None` | Authentication header value sent with webhook notifications |
|
|
229
|
-
| `client_reference_id` | `str \| None` |
|
|
229
|
+
| `client_reference_id` | `str \| None` | Deprecated: pass ``client_reference_id`` to the create call instead. |
|
|
230
230
|
|
|
231
231
|
---
|
|
232
232
|
|
|
@@ -502,7 +502,15 @@ Audio formats accepted by the realtime STT websocket.
|
|
|
502
502
|
|
|
503
503
|
```python
|
|
504
504
|
RealtimeSTTHeaderFormat = Literal[
|
|
505
|
-
"aac",
|
|
505
|
+
"aac",
|
|
506
|
+
"aiff",
|
|
507
|
+
"amr",
|
|
508
|
+
"asf",
|
|
509
|
+
"flac",
|
|
510
|
+
"mp3",
|
|
511
|
+
"ogg",
|
|
512
|
+
"wav",
|
|
513
|
+
"webm",
|
|
506
514
|
]
|
|
507
515
|
```
|
|
508
516
|
|
|
@@ -517,16 +525,25 @@ Container formats whose header carries sample rate and channels.
|
|
|
517
525
|
```python
|
|
518
526
|
RealtimeSTTRawFormat = Literal[
|
|
519
527
|
"pcm_s8",
|
|
520
|
-
"pcm_s16le",
|
|
521
|
-
"
|
|
522
|
-
"
|
|
528
|
+
"pcm_s16le",
|
|
529
|
+
"pcm_s16be",
|
|
530
|
+
"pcm_s24le",
|
|
531
|
+
"pcm_s24be",
|
|
532
|
+
"pcm_s32le",
|
|
533
|
+
"pcm_s32be",
|
|
523
534
|
"pcm_u8",
|
|
524
|
-
"pcm_u16le",
|
|
525
|
-
"
|
|
526
|
-
"
|
|
527
|
-
"
|
|
528
|
-
"
|
|
529
|
-
"
|
|
535
|
+
"pcm_u16le",
|
|
536
|
+
"pcm_u16be",
|
|
537
|
+
"pcm_u24le",
|
|
538
|
+
"pcm_u24be",
|
|
539
|
+
"pcm_u32le",
|
|
540
|
+
"pcm_u32be",
|
|
541
|
+
"pcm_f32le",
|
|
542
|
+
"pcm_f32be",
|
|
543
|
+
"pcm_f64le",
|
|
544
|
+
"pcm_f64be",
|
|
545
|
+
"mulaw",
|
|
546
|
+
"alaw",
|
|
530
547
|
]
|
|
531
548
|
```
|
|
532
549
|
|
|
@@ -993,6 +1010,7 @@ Configuration for initiating a realtime transcription session.
|
|
|
993
1010
|
| `enable_endpoint_detection` | `bool \| None` | Enable endpoint detection for utterance boundaries. |
|
|
994
1011
|
| `max_endpoint_delay_ms` | `int \| None` | Maximum delay between the end of speech and returned endpoint. Allowed values for maximum delay are between 500ms and 3000ms. The default value is 2000ms |
|
|
995
1012
|
| `endpoint_sensitivity` | `float \| None` | Adjusts how likely the model is to emit an endpoint. Higher values make endpoints more likely (finalizing sooner); lower values make them less likely. Allowed values are between -1.0 and 1.0; the default is 0.0. Introduced in the Soniox v5 model; earlier models reject it. |
|
|
1013
|
+
| `endpoint_latency_adjustment_level` | `int \| None` | Fine-tunes the latency/accuracy trade-off of endpoint detection. Allowed values are integers from 0 to 3. |
|
|
996
1014
|
| `translation` | `TranslationConfigInput \| None` | Translation configuration. |
|
|
997
1015
|
| `client_reference_id` | `str \| None` | Optional tracking identifier (max 256 chars). |
|
|
998
1016
|
|
|
@@ -40,9 +40,7 @@ async def main() -> None:
|
|
|
40
40
|
try:
|
|
41
41
|
async with client.realtime.tts.connect(config=config) as connection:
|
|
42
42
|
send_task = asyncio.create_task(
|
|
43
|
-
connection.send_text_chunks(
|
|
44
|
-
_iter_text_chunks(TEXT_CHUNKS), text_end=True
|
|
45
|
-
),
|
|
43
|
+
connection.send_text_chunks(_iter_text_chunks(TEXT_CHUNKS), text_end=True),
|
|
46
44
|
name="tts-async-sender",
|
|
47
45
|
)
|
|
48
46
|
try:
|
{soniox-2.6.0 → soniox-2.7.0}/examples/async_soniox_client/tts_realtime_multiplexed_example.py
RENAMED
|
@@ -69,9 +69,7 @@ def write_outputs(audio_by_stream: dict[str, bytes]) -> None:
|
|
|
69
69
|
)
|
|
70
70
|
if audio:
|
|
71
71
|
output_path.write_bytes(audio)
|
|
72
|
-
print(
|
|
73
|
-
f"Wrote stream {key.upper()} ({len(audio)} bytes) to {output_path.resolve()}"
|
|
74
|
-
)
|
|
72
|
+
print(f"Wrote stream {key.upper()} ({len(audio)} bytes) to {output_path.resolve()}")
|
|
75
73
|
else:
|
|
76
74
|
print(f"No audio file was written for stream {key.upper()}.")
|
|
77
75
|
|
|
@@ -96,8 +94,7 @@ async def main() -> None:
|
|
|
96
94
|
try:
|
|
97
95
|
async with client.realtime.tts.connect_multi_stream() as connection:
|
|
98
96
|
streams = {
|
|
99
|
-
key: await connection.open_stream(config=configs[key])
|
|
100
|
-
for key in sorted(configs)
|
|
97
|
+
key: await connection.open_stream(config=configs[key]) for key in sorted(configs)
|
|
101
98
|
}
|
|
102
99
|
|
|
103
100
|
receiver_tasks = [
|
|
@@ -124,9 +121,7 @@ async def main() -> None:
|
|
|
124
121
|
if isinstance(result, BaseException):
|
|
125
122
|
errors.append(result)
|
|
126
123
|
|
|
127
|
-
receiver_results = await asyncio.gather(
|
|
128
|
-
*receiver_tasks, return_exceptions=True
|
|
129
|
-
)
|
|
124
|
+
receiver_results = await asyncio.gather(*receiver_tasks, return_exceptions=True)
|
|
130
125
|
for key, result in zip(streams.keys(), receiver_results, strict=True):
|
|
131
126
|
if isinstance(result, BaseException):
|
|
132
127
|
errors.append(result)
|
|
@@ -74,9 +74,7 @@ def write_outputs(audio_by_stream: dict[str, bytes]) -> None:
|
|
|
74
74
|
)
|
|
75
75
|
if audio:
|
|
76
76
|
output_path.write_bytes(audio)
|
|
77
|
-
print(
|
|
78
|
-
f"Wrote stream {key.upper()} ({len(audio)} bytes) to {output_path.resolve()}"
|
|
79
|
-
)
|
|
77
|
+
print(f"Wrote stream {key.upper()} ({len(audio)} bytes) to {output_path.resolve()}")
|
|
80
78
|
else:
|
|
81
79
|
print(f"No audio file was written for stream {key.upper()}.")
|
|
82
80
|
|
|
@@ -101,10 +99,7 @@ def main() -> None:
|
|
|
101
99
|
|
|
102
100
|
try:
|
|
103
101
|
with client.realtime.tts.connect_multi_stream() as connection:
|
|
104
|
-
streams = {
|
|
105
|
-
key: connection.open_stream(config=configs[key])
|
|
106
|
-
for key in sorted(configs)
|
|
107
|
-
}
|
|
102
|
+
streams = {key: connection.open_stream(config=configs[key]) for key in sorted(configs)}
|
|
108
103
|
|
|
109
104
|
receiver_threads = [
|
|
110
105
|
threading.Thread(
|
|
@@ -135,9 +130,7 @@ def main() -> None:
|
|
|
135
130
|
|
|
136
131
|
with errors_lock:
|
|
137
132
|
for exc in errors:
|
|
138
|
-
print(
|
|
139
|
-
"Realtime multiplexed TTS error (keeping partial audio):", exc
|
|
140
|
-
)
|
|
133
|
+
print("Realtime multiplexed TTS error (keeping partial audio):", exc)
|
|
141
134
|
finally:
|
|
142
135
|
write_outputs(audio_by_stream)
|
|
143
136
|
client.close()
|
|
@@ -281,7 +281,9 @@ def parse_raises_entries(lines: list[str]) -> list[tuple[str, str]]:
|
|
|
281
281
|
match = re.match(r"^\s*([A-Za-z_][\w\.\[\], ]*)\s*:\s*(.*)$", line)
|
|
282
282
|
if match:
|
|
283
283
|
if current_name is not None:
|
|
284
|
-
entries.append(
|
|
284
|
+
entries.append(
|
|
285
|
+
(current_name.strip(), clean_paragraph_block("\n".join(current_desc)))
|
|
286
|
+
)
|
|
285
287
|
current_name = match.group(1)
|
|
286
288
|
current_desc = [match.group(2).strip()]
|
|
287
289
|
elif current_name is not None:
|
|
@@ -343,7 +345,11 @@ def get_parsed_doc(obj: Object) -> ParsedDoc:
|
|
|
343
345
|
|
|
344
346
|
|
|
345
347
|
def extract_name_target(node: ast.AST) -> str | None:
|
|
346
|
-
if
|
|
348
|
+
if (
|
|
349
|
+
isinstance(node, ast.Assign)
|
|
350
|
+
and len(node.targets) == 1
|
|
351
|
+
and isinstance(node.targets[0], ast.Name)
|
|
352
|
+
):
|
|
347
353
|
return node.targets[0].id
|
|
348
354
|
if isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name):
|
|
349
355
|
return node.target.id
|
|
@@ -434,7 +440,13 @@ def parse_dunder_all(path: Path) -> list[str]:
|
|
|
434
440
|
name = extract_name_target(stmt)
|
|
435
441
|
if name != "__all__":
|
|
436
442
|
continue
|
|
437
|
-
value =
|
|
443
|
+
value = (
|
|
444
|
+
stmt.value
|
|
445
|
+
if isinstance(stmt, ast.Assign)
|
|
446
|
+
else stmt.value
|
|
447
|
+
if isinstance(stmt, ast.AnnAssign)
|
|
448
|
+
else None
|
|
449
|
+
)
|
|
438
450
|
if not isinstance(value, (ast.List, ast.Tuple)):
|
|
439
451
|
continue
|
|
440
452
|
exports: list[str] = []
|
|
@@ -593,7 +605,9 @@ def get_call_name(call: ast.Call) -> str | None:
|
|
|
593
605
|
return None
|
|
594
606
|
|
|
595
607
|
|
|
596
|
-
def iter_function_nodes(
|
|
608
|
+
def iter_function_nodes(
|
|
609
|
+
tree: ast.Module,
|
|
610
|
+
) -> list[tuple[str, ast.FunctionDef | ast.AsyncFunctionDef]]:
|
|
597
611
|
nodes: list[tuple[str, ast.FunctionDef | ast.AsyncFunctionDef]] = []
|
|
598
612
|
for stmt in tree.body:
|
|
599
613
|
if isinstance(stmt, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
@@ -695,7 +709,7 @@ def format_constructor_signature(cls: Class, constructor: Function) -> str:
|
|
|
695
709
|
sig_text = str(constructor.signature())
|
|
696
710
|
match = re.match(r"^__init__\((.*)\)\s*->\s*None$", sig_text)
|
|
697
711
|
if not match:
|
|
698
|
-
return f"{cls.name}{sig_text[sig_text.find('('):]}"
|
|
712
|
+
return f"{cls.name}{sig_text[sig_text.find('(') :]}"
|
|
699
713
|
params = match.group(1).strip()
|
|
700
714
|
if params.startswith("self, "):
|
|
701
715
|
params = params[len("self, ") :]
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
import warnings
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import BinaryIO, TypeVar
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
from ..errors import SonioxAPIError, SonioxValidationError
|
|
12
|
+
from ..types import (
|
|
13
|
+
CreateTranscriptionConfig,
|
|
14
|
+
CreateTranscriptionPayload,
|
|
15
|
+
CreateTtsConfig,
|
|
16
|
+
CreateTtsPayload,
|
|
17
|
+
LanguageCode,
|
|
18
|
+
TranslationConfig,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
ModelT = TypeVar("ModelT", bound=BaseModel)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _warn_deprecated_config_fields(
|
|
25
|
+
config: BaseModel | None, names: tuple[str, ...], advice: str
|
|
26
|
+
) -> None:
|
|
27
|
+
"""Emit a DeprecationWarning if any of ``names`` was explicitly set on ``config``.
|
|
28
|
+
|
|
29
|
+
Read via ``model_fields_set`` (not attribute access) so it never fires for
|
|
30
|
+
unset fields and never double-warns with pydantic's own ``deprecated=`` hook.
|
|
31
|
+
"""
|
|
32
|
+
if config is None:
|
|
33
|
+
return
|
|
34
|
+
used = [n for n in names if n in config.model_fields_set]
|
|
35
|
+
if used:
|
|
36
|
+
warnings.warn(
|
|
37
|
+
f"Setting {', '.join(used)} on {type(config).__name__} is deprecated; "
|
|
38
|
+
f"{advice}. This will be removed in the next major release.",
|
|
39
|
+
DeprecationWarning,
|
|
40
|
+
stacklevel=3,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def ensure_success(response: httpx.Response) -> None:
|
|
45
|
+
if response.is_error:
|
|
46
|
+
raise SonioxAPIError.from_response(response)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def parse_response(response: httpx.Response, model: type[ModelT]) -> ModelT:
|
|
50
|
+
ensure_success(response)
|
|
51
|
+
payload = response.json()
|
|
52
|
+
return model.model_validate(payload)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
async def parse_async_response(response: httpx.Response, model: type[ModelT]) -> ModelT:
|
|
56
|
+
ensure_success(response)
|
|
57
|
+
payload = response.json()
|
|
58
|
+
return model.model_validate(payload)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def normalize_file(
|
|
62
|
+
file: BinaryIO | bytes | Path | str,
|
|
63
|
+
filename: str | None = None,
|
|
64
|
+
) -> tuple[BinaryIO, str, bool]:
|
|
65
|
+
"""Return (file-like, filename, should_close) tuple for upload."""
|
|
66
|
+
if isinstance(file, bytes | bytearray):
|
|
67
|
+
file_obj = io.BytesIO(file)
|
|
68
|
+
effective_name = filename or "upload.bin"
|
|
69
|
+
return file_obj, effective_name, True
|
|
70
|
+
|
|
71
|
+
if isinstance(file, Path):
|
|
72
|
+
file_obj = file.open("rb")
|
|
73
|
+
effective_name = filename or file.name
|
|
74
|
+
return file_obj, effective_name, True
|
|
75
|
+
|
|
76
|
+
if isinstance(file, str):
|
|
77
|
+
return normalize_file(Path(file), filename=filename)
|
|
78
|
+
|
|
79
|
+
if isinstance(file, io.IOBase):
|
|
80
|
+
effective_name = filename or getattr(file, "name", "upload.bin")
|
|
81
|
+
return file, effective_name, False
|
|
82
|
+
|
|
83
|
+
raise TypeError("file must be bytes, Path, or file-like stream.")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def build_create_payload(
|
|
87
|
+
*,
|
|
88
|
+
model: str,
|
|
89
|
+
file_id: str | None,
|
|
90
|
+
audio_url: str | None,
|
|
91
|
+
client_reference_id: str | None,
|
|
92
|
+
config: CreateTranscriptionConfig | None,
|
|
93
|
+
) -> CreateTranscriptionPayload:
|
|
94
|
+
_warn_deprecated_config_fields(
|
|
95
|
+
config,
|
|
96
|
+
("model", "client_reference_id"),
|
|
97
|
+
"pass it directly to the create call instead",
|
|
98
|
+
)
|
|
99
|
+
config_data = config.model_dump(exclude_none=True) if config else {}
|
|
100
|
+
model_override = config_data.pop("model", None)
|
|
101
|
+
client_ref_override = config_data.pop("client_reference_id", None)
|
|
102
|
+
return CreateTranscriptionPayload.model_validate(
|
|
103
|
+
{
|
|
104
|
+
"model": model_override if model_override is not None else model,
|
|
105
|
+
"file_id": file_id,
|
|
106
|
+
"audio_url": audio_url,
|
|
107
|
+
"client_reference_id": (
|
|
108
|
+
client_ref_override if client_ref_override is not None else client_reference_id
|
|
109
|
+
),
|
|
110
|
+
**config_data,
|
|
111
|
+
}
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def build_tts_payload(
|
|
116
|
+
*,
|
|
117
|
+
text: str,
|
|
118
|
+
voice: str,
|
|
119
|
+
model: str,
|
|
120
|
+
config: CreateTtsConfig | None,
|
|
121
|
+
language: str | None = None,
|
|
122
|
+
audio_format: str | None = None,
|
|
123
|
+
sample_rate: int | None = None,
|
|
124
|
+
bitrate: int | None = None,
|
|
125
|
+
) -> CreateTtsPayload:
|
|
126
|
+
# ponytail: deprecation shim. Next major — make `language` required (drop its default +
|
|
127
|
+
# the omit-language warn), delete the flat audio_format/sample_rate/bitrate kwargs + their
|
|
128
|
+
# warn block, drop model/voice/language from CreateTtsConfig + the _warn_deprecated_config_fields
|
|
129
|
+
# call + the override dance. Body collapses to: model_dump → defaults → model_validate.
|
|
130
|
+
# Don't delete _warn_deprecated_config_fields or `import warnings` until STT's shim goes too.
|
|
131
|
+
"""Assemble a TTS payload from flat identity args (``text``/``voice``/``model``/
|
|
132
|
+
``language``) plus a ``config`` settings bag (``audio_format``/``sample_rate``/``bitrate``).
|
|
133
|
+
|
|
134
|
+
Deprecated, kept one release: the flat ``audio_format``/``sample_rate``/``bitrate`` kwargs
|
|
135
|
+
(move them to ``config``); ``model``/``voice``/``language`` set on ``config`` (pass them
|
|
136
|
+
flat); and omitting ``language`` — it will be required in the next major release.
|
|
137
|
+
"""
|
|
138
|
+
deprecated_flat = {
|
|
139
|
+
k: v
|
|
140
|
+
for k, v in {
|
|
141
|
+
"audio_format": audio_format,
|
|
142
|
+
"sample_rate": sample_rate,
|
|
143
|
+
"bitrate": bitrate,
|
|
144
|
+
}.items()
|
|
145
|
+
if v is not None
|
|
146
|
+
}
|
|
147
|
+
if deprecated_flat:
|
|
148
|
+
warnings.warn(
|
|
149
|
+
f"Passing {', '.join(deprecated_flat)} directly to generate()/generate_to_file() "
|
|
150
|
+
"is deprecated; set them on CreateTtsConfig instead. This will be removed in the "
|
|
151
|
+
"next major release.",
|
|
152
|
+
DeprecationWarning,
|
|
153
|
+
stacklevel=3,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
_warn_deprecated_config_fields(
|
|
157
|
+
config,
|
|
158
|
+
("model", "voice", "language"),
|
|
159
|
+
"pass it directly to generate()/generate_to_file() instead",
|
|
160
|
+
)
|
|
161
|
+
config_data = config.model_dump(exclude_none=True) if config else {}
|
|
162
|
+
settings = {**deprecated_flat, **config_data} # config wins for output settings
|
|
163
|
+
# settings never holds None (exclude_none + deprecated_flat filter), so absent-key is
|
|
164
|
+
# the only fallback case: get(key, default) and is-None, never falsy `or`.
|
|
165
|
+
voice_override = settings.pop("voice", None)
|
|
166
|
+
model_override = settings.pop("model", None)
|
|
167
|
+
config_language = settings.pop("language", None)
|
|
168
|
+
|
|
169
|
+
# language is identity, not a setting: the flat arg is the blessed path and wins; a
|
|
170
|
+
# config value is honored (deprecated) next; relying on the "en" default is deprecated.
|
|
171
|
+
if language is not None:
|
|
172
|
+
resolved_language = language
|
|
173
|
+
elif config_language is not None:
|
|
174
|
+
resolved_language = config_language
|
|
175
|
+
else:
|
|
176
|
+
warnings.warn(
|
|
177
|
+
"Relying on the default Text-to-Speech language 'en' is deprecated; pass "
|
|
178
|
+
"language= explicitly. It will be required in the next major release.",
|
|
179
|
+
DeprecationWarning,
|
|
180
|
+
stacklevel=3,
|
|
181
|
+
)
|
|
182
|
+
resolved_language = "en"
|
|
183
|
+
|
|
184
|
+
return CreateTtsPayload.model_validate(
|
|
185
|
+
{
|
|
186
|
+
"text": text,
|
|
187
|
+
"voice": voice if voice_override is None else voice_override,
|
|
188
|
+
"model": model if model_override is None else model_override,
|
|
189
|
+
"language": resolved_language,
|
|
190
|
+
"audio_format": settings.get("audio_format", "wav"),
|
|
191
|
+
"sample_rate": settings.get("sample_rate"),
|
|
192
|
+
"bitrate": settings.get("bitrate"),
|
|
193
|
+
}
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def build_translate_config(
|
|
198
|
+
*,
|
|
199
|
+
to: LanguageCode | None,
|
|
200
|
+
source: LanguageCode | None,
|
|
201
|
+
between: tuple[LanguageCode, LanguageCode] | None,
|
|
202
|
+
config: CreateTranscriptionConfig | None,
|
|
203
|
+
) -> CreateTranscriptionConfig:
|
|
204
|
+
"""Return a config with translation and language fields populated from the kwargs.
|
|
205
|
+
|
|
206
|
+
Requires exactly one of ``to`` or ``between``. ``source`` is only valid with ``to``
|
|
207
|
+
and is passed as a strict language hint. Forces ``enable_language_identification=True``.
|
|
208
|
+
Other config fields are preserved.
|
|
209
|
+
"""
|
|
210
|
+
if (to is None) == (between is None):
|
|
211
|
+
raise SonioxValidationError("Provide exactly one of `to` or `between`")
|
|
212
|
+
if source is not None and to is None:
|
|
213
|
+
raise SonioxValidationError("`source` is only valid with `to`")
|
|
214
|
+
|
|
215
|
+
base = config.model_copy() if config else CreateTranscriptionConfig()
|
|
216
|
+
if to is not None:
|
|
217
|
+
base.translation = TranslationConfig(type="one_way", target_language=to)
|
|
218
|
+
if source:
|
|
219
|
+
base.language_hints = [source]
|
|
220
|
+
base.language_hints_strict = True
|
|
221
|
+
else:
|
|
222
|
+
assert between is not None # validated above
|
|
223
|
+
a, b = between
|
|
224
|
+
base.translation = TranslationConfig(type="two_way", language_a=a, language_b=b)
|
|
225
|
+
|
|
226
|
+
base.enable_language_identification = True
|
|
227
|
+
return base
|