soniox 2.5.0__tar.gz → 2.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. {soniox-2.5.0 → soniox-2.7.0}/CHANGELOG.md +32 -0
  2. {soniox-2.5.0 → soniox-2.7.0}/PKG-INFO +2 -2
  3. {soniox-2.5.0 → soniox-2.7.0}/README.md +1 -1
  4. {soniox-2.5.0 → soniox-2.7.0}/docs/async_client.md +16 -8
  5. {soniox-2.5.0 → soniox-2.7.0}/docs/types.md +34 -15
  6. {soniox-2.5.0 → soniox-2.7.0}/examples/async_soniox_client/realtime_example.py +1 -1
  7. {soniox-2.5.0 → soniox-2.7.0}/examples/async_soniox_client/tts_realtime_example.py +1 -3
  8. {soniox-2.5.0 → soniox-2.7.0}/examples/async_soniox_client/tts_realtime_multiplexed_example.py +3 -8
  9. {soniox-2.5.0 → soniox-2.7.0}/examples/soniox_client/realtime_example.py +1 -1
  10. {soniox-2.5.0 → soniox-2.7.0}/examples/soniox_client/tts_realtime_multiplexed_example.py +3 -10
  11. {soniox-2.5.0 → soniox-2.7.0}/pyproject.toml +1 -1
  12. {soniox-2.5.0 → soniox-2.7.0}/scripts/generate_docs.py +19 -5
  13. soniox-2.7.0/src/soniox/api/_utils.py +227 -0
  14. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/api/async_stt.py +0 -1
  15. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/api/async_tts.py +27 -23
  16. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/api/stt.py +0 -1
  17. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/api/tts.py +28 -24
  18. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/realtime/async_tts.py +2 -12
  19. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/realtime/tts.py +2 -12
  20. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/types/api.py +61 -25
  21. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/types/realtime.py +14 -0
  22. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/_sdk_bindings.py +6 -7
  23. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/test_api.py +1 -1
  24. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/test_async_tts_workflows.py +9 -7
  25. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/test_coverage_gaps.py +2 -2
  26. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/test_translate.py +31 -3
  27. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/test_tts_workflows.py +72 -8
  28. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/test_types.py +15 -4
  29. {soniox-2.5.0 → soniox-2.7.0}/uv.lock +1 -1
  30. soniox-2.5.0/src/soniox/api/_utils.py +0 -118
  31. {soniox-2.5.0 → soniox-2.7.0}/.claude/settings.local.json +0 -0
  32. {soniox-2.5.0 → soniox-2.7.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  33. {soniox-2.5.0 → soniox-2.7.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  34. {soniox-2.5.0 → soniox-2.7.0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  35. {soniox-2.5.0 → soniox-2.7.0}/.gitignore +0 -0
  36. {soniox-2.5.0 → soniox-2.7.0}/CODE_OF_CONDUCT.md +0 -0
  37. {soniox-2.5.0 → soniox-2.7.0}/CONTRIBUTING.md +0 -0
  38. {soniox-2.5.0 → soniox-2.7.0}/LICENSE +0 -0
  39. {soniox-2.5.0 → soniox-2.7.0}/SECURITY.md +0 -0
  40. {soniox-2.5.0 → soniox-2.7.0}/assets/coffee_shop.mp3 +0 -0
  41. {soniox-2.5.0 → soniox-2.7.0}/assets/coffee_shop.pcm_s16le +0 -0
  42. {soniox-2.5.0 → soniox-2.7.0}/assets/two_way_translation.mp3 +0 -0
  43. {soniox-2.5.0 → soniox-2.7.0}/assets/two_way_translation.pcm_s16le +0 -0
  44. {soniox-2.5.0 → soniox-2.7.0}/docs/realtime_client.md +0 -0
  45. {soniox-2.5.0 → soniox-2.7.0}/docs/utils.md +0 -0
  46. {soniox-2.5.0 → soniox-2.7.0}/examples/async_soniox_client/api_example.py +0 -0
  47. {soniox-2.5.0 → soniox-2.7.0}/examples/async_soniox_client/tts_api_example.py +0 -0
  48. {soniox-2.5.0 → soniox-2.7.0}/examples/soniox_client/api_example.py +0 -0
  49. {soniox-2.5.0 → soniox-2.7.0}/examples/soniox_client/tts_api_example.py +0 -0
  50. {soniox-2.5.0 → soniox-2.7.0}/examples/soniox_client/tts_realtime_example.py +0 -0
  51. {soniox-2.5.0 → soniox-2.7.0}/justfile +0 -0
  52. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/__init__.py +0 -0
  53. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/api/__init__.py +0 -0
  54. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/api/async_auth.py +0 -0
  55. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/api/async_concurrency_limits.py +0 -0
  56. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/api/async_files.py +0 -0
  57. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/api/async_models.py +0 -0
  58. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/api/async_tts_models.py +0 -0
  59. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/api/async_usage_logs.py +0 -0
  60. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/api/async_webhooks.py +0 -0
  61. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/api/auth.py +0 -0
  62. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/api/concurrency_limits.py +0 -0
  63. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/api/files.py +0 -0
  64. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/api/models.py +0 -0
  65. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/api/tts_models.py +0 -0
  66. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/api/usage_logs.py +0 -0
  67. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/api/webhooks.py +0 -0
  68. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/client.py +0 -0
  69. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/errors.py +0 -0
  70. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/py.typed +0 -0
  71. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/realtime/__init__.py +0 -0
  72. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/realtime/_constants.py +0 -0
  73. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/realtime/_utils.py +0 -0
  74. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/realtime/async_stt.py +0 -0
  75. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/realtime/stt.py +0 -0
  76. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/types/__init__.py +0 -0
  77. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/types/common.py +0 -0
  78. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/types/webhooks.py +0 -0
  79. {soniox-2.5.0 → soniox-2.7.0}/src/soniox/utils.py +0 -0
  80. {soniox-2.5.0 → soniox-2.7.0}/tests/__init__.py +0 -0
  81. {soniox-2.5.0 → soniox-2.7.0}/tests/conftest.py +0 -0
  82. {soniox-2.5.0 → soniox-2.7.0}/tests/data/openapi.json +0 -0
  83. {soniox-2.5.0 → soniox-2.7.0}/tests/helpers.py +0 -0
  84. {soniox-2.5.0 → soniox-2.7.0}/tests/realtime/__init__.py +0 -0
  85. {soniox-2.5.0 → soniox-2.7.0}/tests/realtime/cases.py +0 -0
  86. {soniox-2.5.0 → soniox-2.7.0}/tests/realtime/mock_ws.py +0 -0
  87. {soniox-2.5.0 → soniox-2.7.0}/tests/realtime/test_async_tts_realtime.py +0 -0
  88. {soniox-2.5.0 → soniox-2.7.0}/tests/realtime/test_chaos_realtime.py +0 -0
  89. {soniox-2.5.0 → soniox-2.7.0}/tests/realtime/test_fuzz_realtime.py +0 -0
  90. {soniox-2.5.0 → soniox-2.7.0}/tests/realtime/test_keepalive.py +0 -0
  91. {soniox-2.5.0 → soniox-2.7.0}/tests/realtime/test_live_ws.py +0 -0
  92. {soniox-2.5.0 → soniox-2.7.0}/tests/realtime/test_stt_realtime.py +0 -0
  93. {soniox-2.5.0 → soniox-2.7.0}/tests/realtime/test_tts_realtime.py +0 -0
  94. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/__init__.py +0 -0
  95. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/_openapi.py +0 -0
  96. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/test_async_client.py +0 -0
  97. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/test_async_stt_workflows.py +0 -0
  98. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/test_client.py +0 -0
  99. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/test_concurrency_limits.py +0 -0
  100. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/test_schema_drift.py +0 -0
  101. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/test_stt_extras.py +0 -0
  102. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/test_stt_workflows.py +0 -0
  103. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/test_sync_async_parity.py +0 -0
  104. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/test_usage_logs.py +0 -0
  105. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/test_utils.py +0 -0
  106. {soniox-2.5.0 → soniox-2.7.0}/tests/unit/test_webhooks.py +0 -0
@@ -73,6 +73,38 @@ Examples:
73
73
 
74
74
  ---
75
75
 
76
+ ## [2.7.0] - 24 jun 2026
77
+
78
+ ### Added
79
+
80
+ - `endpoint_latency_adjustment_level` field on `RealtimeSTTConfig` (integer 0–3) to fine-tune the latency/accuracy trade-off of realtime endpoint detection.
81
+
82
+ ### Changed
83
+
84
+ - TTS REST output settings (`audio_format`, `sample_rate`, `bitrate`) now live on `CreateTtsConfig`; `generate()` and `generate_to_file()` take the utterance's `text`, `voice`, `model`, and `language` directly. Each field now has a single home (no more flat-vs-config overlap). Existing flat-keyword calls keep working (see Deprecated).
85
+ - During the deprecation overlap, when a deprecated field is set both on the config and as a flat argument, the config value now takes precedence uniformly across STT and TTS (previously `client_reference_id` resolved the other way).
86
+
87
+ ### Deprecated
88
+
89
+ - Passing `audio_format`, `sample_rate`, or `bitrate` as keyword arguments to TTS `generate()` / `generate_to_file()` is deprecated; set them on `CreateTtsConfig` instead. The keyword arguments still work but emit a `DeprecationWarning` and will be removed in a future major release.
90
+ - Setting `model`, `voice`, or `language` on `CreateTtsConfig` is deprecated; pass them directly to `generate()` / `generate_to_file()` (they describe the utterance, not output encoding).
91
+ - Relying on the default TTS `language` (`"en"`) is deprecated; pass `language` explicitly to `generate()` / `generate_to_file()`. It will become a required argument in the next major release.
92
+ - Setting `model` or `client_reference_id` on `CreateTranscriptionConfig` is deprecated; pass them directly to the transcription `create*` calls.
93
+
94
+ ### Removed
95
+
96
+ - The internal module constants `DEFAULT_LANGUAGE` and `DEFAULT_AUDIO_FORMAT` in `soniox.api.tts` / `soniox.api.async_tts`. The defaults (`"en"` / `"wav"`) are now applied inside payload construction. Behavior is unchanged.
97
+
98
+ ---
99
+
100
+ ## [2.6.0] - 15 jun 2026
101
+
102
+ ### Added
103
+
104
+ - `endpoint_sensitivity` field on `RealtimeSTTConfig`: adjusts how likely the model is to emit a speech endpoint. Allowed values are between -1.0 and 1.0; the default is 0.0. Introduced in the Soniox v5 model.
105
+
106
+ ---
107
+
76
108
  ## [2.5.0] - 12 jun 2026
77
109
 
78
110
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: soniox
3
- Version: 2.5.0
3
+ Version: 2.7.0
4
4
  Summary: The official Python SDK for the Soniox API (STT, REST)
5
5
  Project-URL: Homepage, https://soniox.com
6
6
  Project-URL: Documentation, https://soniox.com/docs
@@ -111,7 +111,7 @@ from soniox.utils import render_tokens, throttle_audio, start_audio_thread
111
111
  DEMO_FILE = "path_to_your_audio_file"
112
112
 
113
113
  client = SonioxClient()
114
- config = RealtimeSTTConfig(model="stt-rt-v4", audio_format="mp3")
114
+ config = RealtimeSTTConfig(model="stt-rt-v5", audio_format="mp3")
115
115
  final_tokens: list[Token] = []
116
116
  non_final_tokens: list[Token] = []
117
117
 
@@ -68,7 +68,7 @@ from soniox.utils import render_tokens, throttle_audio, start_audio_thread
68
68
  DEMO_FILE = "path_to_your_audio_file"
69
69
 
70
70
  client = SonioxClient()
71
- config = RealtimeSTTConfig(model="stt-rt-v4", audio_format="mp3")
71
+ config = RealtimeSTTConfig(model="stt-rt-v5", audio_format="mp3")
72
72
  final_tokens: list[Token] = []
73
73
  non_final_tokens: list[Token] = []
74
74
 
@@ -1245,13 +1245,17 @@ AsyncTtsAPI(client: AsyncSonioxClient)
1245
1245
  ### generate()
1246
1246
 
1247
1247
  ```python
1248
- generate(*, text: str, voice: str, model: str = DEFAULT_MODEL, language: str = DEFAULT_LANGUAGE, audio_format: TtsAudioFormat = DEFAULT_AUDIO_FORMAT, sample_rate: TtsSampleRate | None = None, bitrate: TtsBitrate | None = None, config: CreateTtsConfig | None = None) -> bytes
1248
+ generate(*, text: str, voice: str, model: str = DEFAULT_MODEL, config: CreateTtsConfig | None = None, language: str | None = None, audio_format: TtsAudioFormat | None = None, sample_rate: TtsSampleRate | None = None, bitrate: TtsBitrate | None = None) -> bytes
1249
1249
  ```
1250
1250
 
1251
1251
  Generate speech audio from text and return raw audio bytes.
1252
1252
 
1253
1253
  Performs a POST request to the TTS REST endpoint.
1254
1254
 
1255
+ ``audio_format``/``sample_rate``/``bitrate`` are deprecated; set them on
1256
+ ``CreateTtsConfig`` instead. Pass ``language`` explicitly — relying on the default
1257
+ ("en") is deprecated and ``language`` will be required in the next major release.
1258
+
1255
1259
  **Parameters**
1256
1260
 
1257
1261
  | Parameter | Type | Description |
@@ -1259,11 +1263,11 @@ Performs a POST request to the TTS REST endpoint.
1259
1263
  | `text` | `str` | Longer free-form background text, prior interaction history, reference documents, or meeting notes. |
1260
1264
  | `voice` | `str` | Voice identifier to generate speech audio with. |
1261
1265
  | `model` | `str` | Speech-to-text model to use. |
1262
- | `language` | `str` | Language code for Text-to-Speech (e.g., "en"). |
1263
- | `audio_format` | `TtsAudioFormat` | Audio format for realtime transcription. |
1266
+ | `config` | `CreateTtsConfig \| None` | Configuration options for this operation. |
1267
+ | `language` | `str \| None` | Language code for Text-to-Speech (e.g., "en"). |
1268
+ | `audio_format` | `TtsAudioFormat \| None` | Audio format for realtime transcription. |
1264
1269
  | `sample_rate` | `TtsSampleRate \| None` | Audio sample rate in Hz. |
1265
1270
  | `bitrate` | `TtsBitrate \| None` | Output bitrate in bits-per-second for compressed formats. |
1266
- | `config` | `CreateTtsConfig \| None` | Configuration options for this operation. |
1267
1271
 
1268
1272
  **Returns**
1269
1273
 
@@ -1280,11 +1284,15 @@ Performs a POST request to the TTS REST endpoint.
1280
1284
  ### generate_to_file()
1281
1285
 
1282
1286
  ```python
1283
- generate_to_file(output: BinaryIO | Path | str, *, text: str, voice: str = DEFAULT_VOICE, model: str = DEFAULT_MODEL, language: str = DEFAULT_LANGUAGE, audio_format: TtsAudioFormat = DEFAULT_AUDIO_FORMAT, sample_rate: TtsSampleRate | None = None, bitrate: TtsBitrate | None = None, config: CreateTtsConfig | None = None) -> int
1287
+ generate_to_file(output: BinaryIO | Path | str, *, text: str, voice: str = DEFAULT_VOICE, model: str = DEFAULT_MODEL, config: CreateTtsConfig | None = None, language: str | None = None, audio_format: TtsAudioFormat | None = None, sample_rate: TtsSampleRate | None = None, bitrate: TtsBitrate | None = None) -> int
1284
1288
  ```
1285
1289
 
1286
1290
  Generate speech audio from text and write the audio bytes to a file-like output.
1287
1291
 
1292
+ ``audio_format``/``sample_rate``/``bitrate`` are deprecated; set them on
1293
+ ``CreateTtsConfig`` instead. Pass ``language`` explicitly — relying on the default
1294
+ ("en") is deprecated and ``language`` will be required in the next major release.
1295
+
1288
1296
  **Parameters**
1289
1297
 
1290
1298
  | Parameter | Type | Description |
@@ -1293,11 +1301,11 @@ Generate speech audio from text and write the audio bytes to a file-like output.
1293
1301
  | `text` | `str` | Longer free-form background text, prior interaction history, reference documents, or meeting notes. |
1294
1302
  | `voice` | `str` | Voice identifier to generate speech audio with. |
1295
1303
  | `model` | `str` | Speech-to-text model to use. |
1296
- | `language` | `str` | Language code for Text-to-Speech (e.g., "en"). |
1297
- | `audio_format` | `TtsAudioFormat` | Audio format for realtime transcription. |
1304
+ | `config` | `CreateTtsConfig \| None` | Configuration options for this operation. |
1305
+ | `language` | `str \| None` | Language code for Text-to-Speech (e.g., "en"). |
1306
+ | `audio_format` | `TtsAudioFormat \| None` | Audio format for realtime transcription. |
1298
1307
  | `sample_rate` | `TtsSampleRate \| None` | Audio sample rate in Hz. |
1299
1308
  | `bitrate` | `TtsBitrate \| None` | Output bitrate in bits-per-second for compressed formats. |
1300
- | `config` | `CreateTtsConfig \| None` | Configuration options for this operation. |
1301
1309
 
1302
1310
  **Returns**
1303
1311
 
@@ -171,9 +171,9 @@ Helper config used when building Text-to-Speech payloads.
171
171
 
172
172
  | Property | Type | Description |
173
173
  | ------ | ------ | ------ |
174
- | `model` | `str \| None` | Text-to-Speech model to use. |
175
- | `language` | `str \| None` | Language code for Text-to-Speech (e.g., "en"). |
176
- | `voice` | `str \| None` | Voice identifier to generate speech audio with. |
174
+ | `model` | `str \| None` | Deprecated: pass ``model`` to generate()/generate_to_file() instead. |
175
+ | `language` | `str \| None` | Deprecated: pass ``language`` to generate()/generate_to_file() instead. |
176
+ | `voice` | `str \| None` | Deprecated: pass ``voice`` to generate()/generate_to_file() instead. |
177
177
  | `audio_format` | `TtsAudioFormat \| None` | Requested output audio format. |
178
178
  | `sample_rate` | `TtsSampleRate \| None` | Output sample rate in Hz. |
179
179
  | `bitrate` | `TtsBitrate \| None` | Output bitrate in bits-per-second for compressed formats. |
@@ -216,7 +216,7 @@ Helper config used when building transcription payloads.
216
216
 
217
217
  | Property | Type | Description |
218
218
  | ------ | ------ | ------ |
219
- | `model` | `str \| None` | Speech-to-text model to use. |
219
+ | `model` | `str \| None` | Deprecated: pass ``model`` to the create call instead. |
220
220
  | `language_hints` | `list[LanguageCode] \| None` | Array of expected ISO language codes to bias recognition. |
221
221
  | `language_hints_strict` | `bool \| None` | When true, model relies more heavily on language hints. |
222
222
  | `enable_speaker_diarization` | `bool \| None` | Enable speaker diarization to identify different speakers. |
@@ -226,7 +226,7 @@ Helper config used when building transcription payloads.
226
226
  | `webhook_url` | `str \| None` | URL to receive webhook notifications when transcription is completed or fails. |
227
227
  | `webhook_auth_header_name` | `str \| None` | Name of the authentication header sent with webhook notifications |
228
228
  | `webhook_auth_header_value` | `str \| None` | Authentication header value sent with webhook notifications |
229
- | `client_reference_id` | `str \| None` | Optional tracking identifier |
229
+ | `client_reference_id` | `str \| None` | Deprecated: pass ``client_reference_id`` to the create call instead. |
230
230
 
231
231
  ---
232
232
 
@@ -502,7 +502,15 @@ Audio formats accepted by the realtime STT websocket.
502
502
 
503
503
  ```python
504
504
  RealtimeSTTHeaderFormat = Literal[
505
- "aac", "aiff", "amr", "asf", "flac", "mp3", "ogg", "wav", "webm",
505
+ "aac",
506
+ "aiff",
507
+ "amr",
508
+ "asf",
509
+ "flac",
510
+ "mp3",
511
+ "ogg",
512
+ "wav",
513
+ "webm",
506
514
  ]
507
515
  ```
508
516
 
@@ -517,16 +525,25 @@ Container formats whose header carries sample rate and channels.
517
525
  ```python
518
526
  RealtimeSTTRawFormat = Literal[
519
527
  "pcm_s8",
520
- "pcm_s16le", "pcm_s16be",
521
- "pcm_s24le", "pcm_s24be",
522
- "pcm_s32le", "pcm_s32be",
528
+ "pcm_s16le",
529
+ "pcm_s16be",
530
+ "pcm_s24le",
531
+ "pcm_s24be",
532
+ "pcm_s32le",
533
+ "pcm_s32be",
523
534
  "pcm_u8",
524
- "pcm_u16le", "pcm_u16be",
525
- "pcm_u24le", "pcm_u24be",
526
- "pcm_u32le", "pcm_u32be",
527
- "pcm_f32le", "pcm_f32be",
528
- "pcm_f64le", "pcm_f64be",
529
- "mulaw", "alaw",
535
+ "pcm_u16le",
536
+ "pcm_u16be",
537
+ "pcm_u24le",
538
+ "pcm_u24be",
539
+ "pcm_u32le",
540
+ "pcm_u32be",
541
+ "pcm_f32le",
542
+ "pcm_f32be",
543
+ "pcm_f64le",
544
+ "pcm_f64be",
545
+ "mulaw",
546
+ "alaw",
530
547
  ]
531
548
  ```
532
549
 
@@ -992,6 +1009,8 @@ Configuration for initiating a realtime transcription session.
992
1009
  | `enable_language_identification` | `bool \| None` | Enable automatic language detection. |
993
1010
  | `enable_endpoint_detection` | `bool \| None` | Enable endpoint detection for utterance boundaries. |
994
1011
  | `max_endpoint_delay_ms` | `int \| None` | Maximum delay between the end of speech and returned endpoint. Allowed values for maximum delay are between 500ms and 3000ms. The default value is 2000ms |
1012
+ | `endpoint_sensitivity` | `float \| None` | Adjusts how likely the model is to emit an endpoint. Higher values make endpoints more likely (finalizing sooner); lower values make them less likely. Allowed values are between -1.0 and 1.0; the default is 0.0. Introduced in the Soniox v5 model; earlier models reject it. |
1013
+ | `endpoint_latency_adjustment_level` | `int \| None` | Fine-tunes the latency/accuracy trade-off of endpoint detection. Allowed values are integers from 0 to 3. |
995
1014
  | `translation` | `TranslationConfigInput \| None` | Translation configuration. |
996
1015
  | `client_reference_id` | `str \| None` | Optional tracking identifier (max 256 chars). |
997
1016
 
@@ -10,7 +10,7 @@ DEMO_FILE = Path(__file__).resolve().parents[2] / "assets" / "coffee_shop.mp3"
10
10
 
11
11
  async def main() -> None:
12
12
  client = AsyncSonioxClient()
13
- config = RealtimeSTTConfig(model="stt-rt-v4", audio_format="mp3")
13
+ config = RealtimeSTTConfig(model="stt-rt-v5", audio_format="mp3")
14
14
  final_tokens: list[Token] = []
15
15
  non_final_tokens: list[Token] = []
16
16
  async with client.realtime.stt.connect(config=config) as session:
@@ -40,9 +40,7 @@ async def main() -> None:
40
40
  try:
41
41
  async with client.realtime.tts.connect(config=config) as connection:
42
42
  send_task = asyncio.create_task(
43
- connection.send_text_chunks(
44
- _iter_text_chunks(TEXT_CHUNKS), text_end=True
45
- ),
43
+ connection.send_text_chunks(_iter_text_chunks(TEXT_CHUNKS), text_end=True),
46
44
  name="tts-async-sender",
47
45
  )
48
46
  try:
@@ -69,9 +69,7 @@ def write_outputs(audio_by_stream: dict[str, bytes]) -> None:
69
69
  )
70
70
  if audio:
71
71
  output_path.write_bytes(audio)
72
- print(
73
- f"Wrote stream {key.upper()} ({len(audio)} bytes) to {output_path.resolve()}"
74
- )
72
+ print(f"Wrote stream {key.upper()} ({len(audio)} bytes) to {output_path.resolve()}")
75
73
  else:
76
74
  print(f"No audio file was written for stream {key.upper()}.")
77
75
 
@@ -96,8 +94,7 @@ async def main() -> None:
96
94
  try:
97
95
  async with client.realtime.tts.connect_multi_stream() as connection:
98
96
  streams = {
99
- key: await connection.open_stream(config=configs[key])
100
- for key in sorted(configs)
97
+ key: await connection.open_stream(config=configs[key]) for key in sorted(configs)
101
98
  }
102
99
 
103
100
  receiver_tasks = [
@@ -124,9 +121,7 @@ async def main() -> None:
124
121
  if isinstance(result, BaseException):
125
122
  errors.append(result)
126
123
 
127
- receiver_results = await asyncio.gather(
128
- *receiver_tasks, return_exceptions=True
129
- )
124
+ receiver_results = await asyncio.gather(*receiver_tasks, return_exceptions=True)
130
125
  for key, result in zip(streams.keys(), receiver_results, strict=True):
131
126
  if isinstance(result, BaseException):
132
127
  errors.append(result)
@@ -9,7 +9,7 @@ DEMO_FILE = Path(__file__).resolve().parents[2] / "assets" / "coffee_shop.mp3"
9
9
 
10
10
  def main() -> None:
11
11
  client = SonioxClient()
12
- config = RealtimeSTTConfig(model="stt-rt-v4", audio_format="mp3")
12
+ config = RealtimeSTTConfig(model="stt-rt-v5", audio_format="mp3")
13
13
  final_tokens: list[Token] = []
14
14
  non_final_tokens: list[Token] = []
15
15
  with client.realtime.stt.connect(config=config) as session:
@@ -74,9 +74,7 @@ def write_outputs(audio_by_stream: dict[str, bytes]) -> None:
74
74
  )
75
75
  if audio:
76
76
  output_path.write_bytes(audio)
77
- print(
78
- f"Wrote stream {key.upper()} ({len(audio)} bytes) to {output_path.resolve()}"
79
- )
77
+ print(f"Wrote stream {key.upper()} ({len(audio)} bytes) to {output_path.resolve()}")
80
78
  else:
81
79
  print(f"No audio file was written for stream {key.upper()}.")
82
80
 
@@ -101,10 +99,7 @@ def main() -> None:
101
99
 
102
100
  try:
103
101
  with client.realtime.tts.connect_multi_stream() as connection:
104
- streams = {
105
- key: connection.open_stream(config=configs[key])
106
- for key in sorted(configs)
107
- }
102
+ streams = {key: connection.open_stream(config=configs[key]) for key in sorted(configs)}
108
103
 
109
104
  receiver_threads = [
110
105
  threading.Thread(
@@ -135,9 +130,7 @@ def main() -> None:
135
130
 
136
131
  with errors_lock:
137
132
  for exc in errors:
138
- print(
139
- "Realtime multiplexed TTS error (keeping partial audio):", exc
140
- )
133
+ print("Realtime multiplexed TTS error (keeping partial audio):", exc)
141
134
  finally:
142
135
  write_outputs(audio_by_stream)
143
136
  client.close()
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "soniox"
7
- version = "2.5.0"
7
+ version = "2.7.0"
8
8
  dependencies = ["httpx>0.25.0", "websockets>11.0", "pydantic>2"]
9
9
  requires-python = ">=3.10"
10
10
  authors = [{ name = "Soniox", email = "support@soniox.com" }]
@@ -281,7 +281,9 @@ def parse_raises_entries(lines: list[str]) -> list[tuple[str, str]]:
281
281
  match = re.match(r"^\s*([A-Za-z_][\w\.\[\], ]*)\s*:\s*(.*)$", line)
282
282
  if match:
283
283
  if current_name is not None:
284
- entries.append((current_name.strip(), clean_paragraph_block("\n".join(current_desc))))
284
+ entries.append(
285
+ (current_name.strip(), clean_paragraph_block("\n".join(current_desc)))
286
+ )
285
287
  current_name = match.group(1)
286
288
  current_desc = [match.group(2).strip()]
287
289
  elif current_name is not None:
@@ -343,7 +345,11 @@ def get_parsed_doc(obj: Object) -> ParsedDoc:
343
345
 
344
346
 
345
347
  def extract_name_target(node: ast.AST) -> str | None:
346
- if isinstance(node, ast.Assign) and len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
348
+ if (
349
+ isinstance(node, ast.Assign)
350
+ and len(node.targets) == 1
351
+ and isinstance(node.targets[0], ast.Name)
352
+ ):
347
353
  return node.targets[0].id
348
354
  if isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name):
349
355
  return node.target.id
@@ -434,7 +440,13 @@ def parse_dunder_all(path: Path) -> list[str]:
434
440
  name = extract_name_target(stmt)
435
441
  if name != "__all__":
436
442
  continue
437
- value = stmt.value if isinstance(stmt, ast.Assign) else stmt.value if isinstance(stmt, ast.AnnAssign) else None
443
+ value = (
444
+ stmt.value
445
+ if isinstance(stmt, ast.Assign)
446
+ else stmt.value
447
+ if isinstance(stmt, ast.AnnAssign)
448
+ else None
449
+ )
438
450
  if not isinstance(value, (ast.List, ast.Tuple)):
439
451
  continue
440
452
  exports: list[str] = []
@@ -593,7 +605,9 @@ def get_call_name(call: ast.Call) -> str | None:
593
605
  return None
594
606
 
595
607
 
596
- def iter_function_nodes(tree: ast.Module) -> list[tuple[str, ast.FunctionDef | ast.AsyncFunctionDef]]:
608
+ def iter_function_nodes(
609
+ tree: ast.Module,
610
+ ) -> list[tuple[str, ast.FunctionDef | ast.AsyncFunctionDef]]:
597
611
  nodes: list[tuple[str, ast.FunctionDef | ast.AsyncFunctionDef]] = []
598
612
  for stmt in tree.body:
599
613
  if isinstance(stmt, (ast.FunctionDef, ast.AsyncFunctionDef)):
@@ -695,7 +709,7 @@ def format_constructor_signature(cls: Class, constructor: Function) -> str:
695
709
  sig_text = str(constructor.signature())
696
710
  match = re.match(r"^__init__\((.*)\)\s*->\s*None$", sig_text)
697
711
  if not match:
698
- return f"{cls.name}{sig_text[sig_text.find('('):]}"
712
+ return f"{cls.name}{sig_text[sig_text.find('(') :]}"
699
713
  params = match.group(1).strip()
700
714
  if params.startswith("self, "):
701
715
  params = params[len("self, ") :]
@@ -0,0 +1,227 @@
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import warnings
5
+ from pathlib import Path
6
+ from typing import BinaryIO, TypeVar
7
+
8
+ import httpx
9
+ from pydantic import BaseModel
10
+
11
+ from ..errors import SonioxAPIError, SonioxValidationError
12
+ from ..types import (
13
+ CreateTranscriptionConfig,
14
+ CreateTranscriptionPayload,
15
+ CreateTtsConfig,
16
+ CreateTtsPayload,
17
+ LanguageCode,
18
+ TranslationConfig,
19
+ )
20
+
21
+ ModelT = TypeVar("ModelT", bound=BaseModel)
22
+
23
+
24
+ def _warn_deprecated_config_fields(
25
+ config: BaseModel | None, names: tuple[str, ...], advice: str
26
+ ) -> None:
27
+ """Emit a DeprecationWarning if any of ``names`` was explicitly set on ``config``.
28
+
29
+ Read via ``model_fields_set`` (not attribute access) so it never fires for
30
+ unset fields and never double-warns with pydantic's own ``deprecated=`` hook.
31
+ """
32
+ if config is None:
33
+ return
34
+ used = [n for n in names if n in config.model_fields_set]
35
+ if used:
36
+ warnings.warn(
37
+ f"Setting {', '.join(used)} on {type(config).__name__} is deprecated; "
38
+ f"{advice}. This will be removed in the next major release.",
39
+ DeprecationWarning,
40
+ stacklevel=3,
41
+ )
42
+
43
+
44
+ def ensure_success(response: httpx.Response) -> None:
45
+ if response.is_error:
46
+ raise SonioxAPIError.from_response(response)
47
+
48
+
49
+ def parse_response(response: httpx.Response, model: type[ModelT]) -> ModelT:
50
+ ensure_success(response)
51
+ payload = response.json()
52
+ return model.model_validate(payload)
53
+
54
+
55
+ async def parse_async_response(response: httpx.Response, model: type[ModelT]) -> ModelT:
56
+ ensure_success(response)
57
+ payload = response.json()
58
+ return model.model_validate(payload)
59
+
60
+
61
+ def normalize_file(
62
+ file: BinaryIO | bytes | Path | str,
63
+ filename: str | None = None,
64
+ ) -> tuple[BinaryIO, str, bool]:
65
+ """Return (file-like, filename, should_close) tuple for upload."""
66
+ if isinstance(file, bytes | bytearray):
67
+ file_obj = io.BytesIO(file)
68
+ effective_name = filename or "upload.bin"
69
+ return file_obj, effective_name, True
70
+
71
+ if isinstance(file, Path):
72
+ file_obj = file.open("rb")
73
+ effective_name = filename or file.name
74
+ return file_obj, effective_name, True
75
+
76
+ if isinstance(file, str):
77
+ return normalize_file(Path(file), filename=filename)
78
+
79
+ if isinstance(file, io.IOBase):
80
+ effective_name = filename or getattr(file, "name", "upload.bin")
81
+ return file, effective_name, False
82
+
83
+ raise TypeError("file must be bytes, Path, or file-like stream.")
84
+
85
+
86
+ def build_create_payload(
87
+ *,
88
+ model: str,
89
+ file_id: str | None,
90
+ audio_url: str | None,
91
+ client_reference_id: str | None,
92
+ config: CreateTranscriptionConfig | None,
93
+ ) -> CreateTranscriptionPayload:
94
+ _warn_deprecated_config_fields(
95
+ config,
96
+ ("model", "client_reference_id"),
97
+ "pass it directly to the create call instead",
98
+ )
99
+ config_data = config.model_dump(exclude_none=True) if config else {}
100
+ model_override = config_data.pop("model", None)
101
+ client_ref_override = config_data.pop("client_reference_id", None)
102
+ return CreateTranscriptionPayload.model_validate(
103
+ {
104
+ "model": model_override if model_override is not None else model,
105
+ "file_id": file_id,
106
+ "audio_url": audio_url,
107
+ "client_reference_id": (
108
+ client_ref_override if client_ref_override is not None else client_reference_id
109
+ ),
110
+ **config_data,
111
+ }
112
+ )
113
+
114
+
115
+ def build_tts_payload(
116
+ *,
117
+ text: str,
118
+ voice: str,
119
+ model: str,
120
+ config: CreateTtsConfig | None,
121
+ language: str | None = None,
122
+ audio_format: str | None = None,
123
+ sample_rate: int | None = None,
124
+ bitrate: int | None = None,
125
+ ) -> CreateTtsPayload:
126
+ # ponytail: deprecation shim. Next major — make `language` required (drop its default +
127
+ # the omit-language warn), delete the flat audio_format/sample_rate/bitrate kwargs + their
128
+ # warn block, drop model/voice/language from CreateTtsConfig + the _warn_deprecated_config_fields
129
+ # call + the override dance. Body collapses to: model_dump → defaults → model_validate.
130
+ # Don't delete _warn_deprecated_config_fields or `import warnings` until STT's shim goes too.
131
+ """Assemble a TTS payload from flat identity args (``text``/``voice``/``model``/
132
+ ``language``) plus a ``config`` settings bag (``audio_format``/``sample_rate``/``bitrate``).
133
+
134
+ Deprecated, kept one release: the flat ``audio_format``/``sample_rate``/``bitrate`` kwargs
135
+ (move them to ``config``); ``model``/``voice``/``language`` set on ``config`` (pass them
136
+ flat); and omitting ``language`` — it will be required in the next major release.
137
+ """
138
+ deprecated_flat = {
139
+ k: v
140
+ for k, v in {
141
+ "audio_format": audio_format,
142
+ "sample_rate": sample_rate,
143
+ "bitrate": bitrate,
144
+ }.items()
145
+ if v is not None
146
+ }
147
+ if deprecated_flat:
148
+ warnings.warn(
149
+ f"Passing {', '.join(deprecated_flat)} directly to generate()/generate_to_file() "
150
+ "is deprecated; set them on CreateTtsConfig instead. This will be removed in the "
151
+ "next major release.",
152
+ DeprecationWarning,
153
+ stacklevel=3,
154
+ )
155
+
156
+ _warn_deprecated_config_fields(
157
+ config,
158
+ ("model", "voice", "language"),
159
+ "pass it directly to generate()/generate_to_file() instead",
160
+ )
161
+ config_data = config.model_dump(exclude_none=True) if config else {}
162
+ settings = {**deprecated_flat, **config_data} # config wins for output settings
163
+ # settings never holds None (exclude_none + deprecated_flat filter), so absent-key is
164
+ # the only fallback case: get(key, default) and is-None, never falsy `or`.
165
+ voice_override = settings.pop("voice", None)
166
+ model_override = settings.pop("model", None)
167
+ config_language = settings.pop("language", None)
168
+
169
+ # language is identity, not a setting: the flat arg is the blessed path and wins; a
170
+ # config value is honored (deprecated) next; relying on the "en" default is deprecated.
171
+ if language is not None:
172
+ resolved_language = language
173
+ elif config_language is not None:
174
+ resolved_language = config_language
175
+ else:
176
+ warnings.warn(
177
+ "Relying on the default Text-to-Speech language 'en' is deprecated; pass "
178
+ "language= explicitly. It will be required in the next major release.",
179
+ DeprecationWarning,
180
+ stacklevel=3,
181
+ )
182
+ resolved_language = "en"
183
+
184
+ return CreateTtsPayload.model_validate(
185
+ {
186
+ "text": text,
187
+ "voice": voice if voice_override is None else voice_override,
188
+ "model": model if model_override is None else model_override,
189
+ "language": resolved_language,
190
+ "audio_format": settings.get("audio_format", "wav"),
191
+ "sample_rate": settings.get("sample_rate"),
192
+ "bitrate": settings.get("bitrate"),
193
+ }
194
+ )
195
+
196
+
197
+ def build_translate_config(
198
+ *,
199
+ to: LanguageCode | None,
200
+ source: LanguageCode | None,
201
+ between: tuple[LanguageCode, LanguageCode] | None,
202
+ config: CreateTranscriptionConfig | None,
203
+ ) -> CreateTranscriptionConfig:
204
+ """Return a config with translation and language fields populated from the kwargs.
205
+
206
+ Requires exactly one of ``to`` or ``between``. ``source`` is only valid with ``to``
207
+ and is passed as a strict language hint. Forces ``enable_language_identification=True``.
208
+ Other config fields are preserved.
209
+ """
210
+ if (to is None) == (between is None):
211
+ raise SonioxValidationError("Provide exactly one of `to` or `between`")
212
+ if source is not None and to is None:
213
+ raise SonioxValidationError("`source` is only valid with `to`")
214
+
215
+ base = config.model_copy() if config else CreateTranscriptionConfig()
216
+ if to is not None:
217
+ base.translation = TranslationConfig(type="one_way", target_language=to)
218
+ if source:
219
+ base.language_hints = [source]
220
+ base.language_hints_strict = True
221
+ else:
222
+ assert between is not None # validated above
223
+ a, b = between
224
+ base.translation = TranslationConfig(type="two_way", language_a=a, language_b=b)
225
+
226
+ base.enable_language_identification = True
227
+ return base
@@ -694,4 +694,3 @@ class AsyncSttAPI:
694
694
  wait_timeout_sec=wait_timeout_sec,
695
695
  config=build_translate_config(to=to, source=source, between=between, config=config),
696
696
  )
697
-