together 1.5.20__tar.gz → 1.5.21__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {together-1.5.20 → together-1.5.21}/PKG-INFO +1 -1
  2. {together-1.5.20 → together-1.5.21}/pyproject.toml +1 -1
  3. {together-1.5.20 → together-1.5.21}/src/together/client.py +2 -0
  4. together-1.5.21/src/together/resources/audio/__init__.py +42 -0
  5. together-1.5.21/src/together/resources/audio/transcriptions.py +266 -0
  6. together-1.5.21/src/together/resources/audio/translations.py +276 -0
  7. {together-1.5.20 → together-1.5.21}/src/together/types/__init__.py +16 -0
  8. {together-1.5.20 → together-1.5.21}/src/together/types/audio_speech.py +82 -7
  9. together-1.5.20/src/together/resources/audio/__init__.py +0 -24
  10. {together-1.5.20 → together-1.5.21}/LICENSE +0 -0
  11. {together-1.5.20 → together-1.5.21}/README.md +0 -0
  12. {together-1.5.20 → together-1.5.21}/src/together/__init__.py +0 -0
  13. {together-1.5.20 → together-1.5.21}/src/together/abstract/__init__.py +0 -0
  14. {together-1.5.20 → together-1.5.21}/src/together/abstract/api_requestor.py +0 -0
  15. {together-1.5.20 → together-1.5.21}/src/together/cli/__init__.py +0 -0
  16. {together-1.5.20 → together-1.5.21}/src/together/cli/api/__init__.py +0 -0
  17. {together-1.5.20 → together-1.5.21}/src/together/cli/api/chat.py +0 -0
  18. {together-1.5.20 → together-1.5.21}/src/together/cli/api/completions.py +0 -0
  19. {together-1.5.20 → together-1.5.21}/src/together/cli/api/endpoints.py +0 -0
  20. {together-1.5.20 → together-1.5.21}/src/together/cli/api/files.py +0 -0
  21. {together-1.5.20 → together-1.5.21}/src/together/cli/api/finetune.py +0 -0
  22. {together-1.5.20 → together-1.5.21}/src/together/cli/api/images.py +0 -0
  23. {together-1.5.20 → together-1.5.21}/src/together/cli/api/models.py +0 -0
  24. {together-1.5.20 → together-1.5.21}/src/together/cli/api/utils.py +0 -0
  25. {together-1.5.20 → together-1.5.21}/src/together/cli/cli.py +0 -0
  26. {together-1.5.20 → together-1.5.21}/src/together/constants.py +0 -0
  27. {together-1.5.20 → together-1.5.21}/src/together/error.py +0 -0
  28. {together-1.5.20 → together-1.5.21}/src/together/filemanager.py +0 -0
  29. {together-1.5.20 → together-1.5.21}/src/together/legacy/__init__.py +0 -0
  30. {together-1.5.20 → together-1.5.21}/src/together/legacy/base.py +0 -0
  31. {together-1.5.20 → together-1.5.21}/src/together/legacy/complete.py +0 -0
  32. {together-1.5.20 → together-1.5.21}/src/together/legacy/embeddings.py +0 -0
  33. {together-1.5.20 → together-1.5.21}/src/together/legacy/files.py +0 -0
  34. {together-1.5.20 → together-1.5.21}/src/together/legacy/finetune.py +0 -0
  35. {together-1.5.20 → together-1.5.21}/src/together/legacy/images.py +0 -0
  36. {together-1.5.20 → together-1.5.21}/src/together/legacy/models.py +0 -0
  37. {together-1.5.20 → together-1.5.21}/src/together/resources/__init__.py +0 -0
  38. {together-1.5.20 → together-1.5.21}/src/together/resources/audio/speech.py +0 -0
  39. {together-1.5.20 → together-1.5.21}/src/together/resources/batch.py +0 -0
  40. {together-1.5.20 → together-1.5.21}/src/together/resources/chat/__init__.py +0 -0
  41. {together-1.5.20 → together-1.5.21}/src/together/resources/chat/completions.py +0 -0
  42. {together-1.5.20 → together-1.5.21}/src/together/resources/code_interpreter.py +0 -0
  43. {together-1.5.20 → together-1.5.21}/src/together/resources/completions.py +0 -0
  44. {together-1.5.20 → together-1.5.21}/src/together/resources/embeddings.py +0 -0
  45. {together-1.5.20 → together-1.5.21}/src/together/resources/endpoints.py +0 -0
  46. {together-1.5.20 → together-1.5.21}/src/together/resources/files.py +0 -0
  47. {together-1.5.20 → together-1.5.21}/src/together/resources/finetune.py +0 -0
  48. {together-1.5.20 → together-1.5.21}/src/together/resources/images.py +0 -0
  49. {together-1.5.20 → together-1.5.21}/src/together/resources/models.py +0 -0
  50. {together-1.5.20 → together-1.5.21}/src/together/resources/rerank.py +0 -0
  51. {together-1.5.20 → together-1.5.21}/src/together/together_response.py +0 -0
  52. {together-1.5.20 → together-1.5.21}/src/together/types/abstract.py +0 -0
  53. {together-1.5.20 → together-1.5.21}/src/together/types/batch.py +0 -0
  54. {together-1.5.20 → together-1.5.21}/src/together/types/chat_completions.py +0 -0
  55. {together-1.5.20 → together-1.5.21}/src/together/types/code_interpreter.py +0 -0
  56. {together-1.5.20 → together-1.5.21}/src/together/types/common.py +0 -0
  57. {together-1.5.20 → together-1.5.21}/src/together/types/completions.py +0 -0
  58. {together-1.5.20 → together-1.5.21}/src/together/types/embeddings.py +0 -0
  59. {together-1.5.20 → together-1.5.21}/src/together/types/endpoints.py +0 -0
  60. {together-1.5.20 → together-1.5.21}/src/together/types/error.py +0 -0
  61. {together-1.5.20 → together-1.5.21}/src/together/types/files.py +0 -0
  62. {together-1.5.20 → together-1.5.21}/src/together/types/finetune.py +0 -0
  63. {together-1.5.20 → together-1.5.21}/src/together/types/images.py +0 -0
  64. {together-1.5.20 → together-1.5.21}/src/together/types/models.py +0 -0
  65. {together-1.5.20 → together-1.5.21}/src/together/types/rerank.py +0 -0
  66. {together-1.5.20 → together-1.5.21}/src/together/utils/__init__.py +0 -0
  67. {together-1.5.20 → together-1.5.21}/src/together/utils/_log.py +0 -0
  68. {together-1.5.20 → together-1.5.21}/src/together/utils/api_helpers.py +0 -0
  69. {together-1.5.20 → together-1.5.21}/src/together/utils/files.py +0 -0
  70. {together-1.5.20 → together-1.5.21}/src/together/utils/tools.py +0 -0
  71. {together-1.5.20 → together-1.5.21}/src/together/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: together
3
- Version: 1.5.20
3
+ Version: 1.5.21
4
4
  Summary: Python client for Together's Cloud Platform!
5
5
  License: Apache-2.0
6
6
  Author: Together AI
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
12
12
 
13
13
  [tool.poetry]
14
14
  name = "together"
15
- version = "1.5.20"
15
+ version = "1.5.21"
16
16
  authors = ["Together AI <support@together.ai>"]
17
17
  description = "Python client for Together's Cloud Platform!"
18
18
  readme = "README.md"
@@ -103,6 +103,7 @@ class AsyncTogether:
103
103
  models: resources.AsyncModels
104
104
  fine_tuning: resources.AsyncFineTuning
105
105
  rerank: resources.AsyncRerank
106
+ audio: resources.AsyncAudio
106
107
  code_interpreter: CodeInterpreter
107
108
  batches: resources.AsyncBatches
108
109
  # client options
@@ -167,6 +168,7 @@ class AsyncTogether:
167
168
  self.models = resources.AsyncModels(self.client)
168
169
  self.fine_tuning = resources.AsyncFineTuning(self.client)
169
170
  self.rerank = resources.AsyncRerank(self.client)
171
+ self.audio = resources.AsyncAudio(self.client)
170
172
  self.code_interpreter = CodeInterpreter(self.client)
171
173
  self.batches = resources.AsyncBatches(self.client)
172
174
 
@@ -0,0 +1,42 @@
1
+ from functools import cached_property
2
+
3
+ from together.resources.audio.speech import AsyncSpeech, Speech
4
+ from together.resources.audio.transcriptions import AsyncTranscriptions, Transcriptions
5
+ from together.resources.audio.translations import AsyncTranslations, Translations
6
+ from together.types import (
7
+ TogetherClient,
8
+ )
9
+
10
+
11
+ class Audio:
12
+ def __init__(self, client: TogetherClient) -> None:
13
+ self._client = client
14
+
15
+ @cached_property
16
+ def speech(self) -> Speech:
17
+ return Speech(self._client)
18
+
19
+ @cached_property
20
+ def transcriptions(self) -> Transcriptions:
21
+ return Transcriptions(self._client)
22
+
23
+ @cached_property
24
+ def translations(self) -> Translations:
25
+ return Translations(self._client)
26
+
27
+
28
+ class AsyncAudio:
29
+ def __init__(self, client: TogetherClient) -> None:
30
+ self._client = client
31
+
32
+ @cached_property
33
+ def speech(self) -> AsyncSpeech:
34
+ return AsyncSpeech(self._client)
35
+
36
+ @cached_property
37
+ def transcriptions(self) -> AsyncTranscriptions:
38
+ return AsyncTranscriptions(self._client)
39
+
40
+ @cached_property
41
+ def translations(self) -> AsyncTranslations:
42
+ return AsyncTranslations(self._client)
@@ -0,0 +1,266 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Any, BinaryIO, Dict, Optional, Tuple, Union
5
+
6
+ from together.abstract import api_requestor
7
+ from together.types import (
8
+ AudioTimestampGranularities,
9
+ AudioTranscriptionResponse,
10
+ AudioTranscriptionResponseFormat,
11
+ AudioTranscriptionVerboseResponse,
12
+ TogetherClient,
13
+ TogetherRequest,
14
+ )
15
+
16
+
17
+ class Transcriptions:
18
+ def __init__(self, client: TogetherClient) -> None:
19
+ self._client = client
20
+
21
+ def create(
22
+ self,
23
+ *,
24
+ file: Union[str, BinaryIO, Path],
25
+ model: str = "openai/whisper-large-v3",
26
+ language: Optional[str] = None,
27
+ prompt: Optional[str] = None,
28
+ response_format: Union[str, AudioTranscriptionResponseFormat] = "json",
29
+ temperature: float = 0.0,
30
+ timestamp_granularities: Optional[
31
+ Union[str, AudioTimestampGranularities]
32
+ ] = None,
33
+ **kwargs: Any,
34
+ ) -> Union[AudioTranscriptionResponse, AudioTranscriptionVerboseResponse]:
35
+ """
36
+ Transcribes audio into the input language.
37
+
38
+ Args:
39
+ file: The audio file object (not file name) to transcribe, in one of these formats:
40
+ flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
41
+ Can be a file path (str/Path), file object (BinaryIO), or URL (str).
42
+ model: ID of the model to use. Defaults to "openai/whisper-large-v3".
43
+ language: The language of the input audio. Supplying the input language in
44
+ ISO-639-1 format will improve accuracy and latency.
45
+ prompt: An optional text to guide the model's style or continue a previous
46
+ audio segment. The prompt should match the audio language.
47
+ response_format: The format of the transcript output, in one of these options:
48
+ json, verbose_json.
49
+ temperature: The sampling temperature, between 0 and 1. Higher values like 0.8
50
+ will make the output more random, while lower values like 0.2 will make it
51
+ more focused and deterministic.
52
+ timestamp_granularities: The timestamp granularities to populate for this
53
+ transcription. response_format must be set verbose_json to use timestamp
54
+ granularities. Either or both of these options are supported: word, or segment.
55
+
56
+ Returns:
57
+ The transcribed text in the requested format.
58
+ """
59
+
60
+ requestor = api_requestor.APIRequestor(
61
+ client=self._client,
62
+ )
63
+
64
+ # Handle file input - could be a path, URL, or file object
65
+ files_data: Dict[str, Union[Tuple[None, str], BinaryIO]] = {}
66
+ params_data = {}
67
+
68
+ if isinstance(file, (str, Path)):
69
+ if isinstance(file, str) and file.startswith(("http://", "https://")):
70
+ # URL string - send as multipart field
71
+ files_data["file"] = (None, file)
72
+ else:
73
+ # Local file path
74
+ file_path = Path(file)
75
+ files_data["file"] = open(file_path, "rb")
76
+ else:
77
+ # File object
78
+ files_data["file"] = file
79
+
80
+ # Build request parameters
81
+ params_data.update(
82
+ {
83
+ "model": model,
84
+ "response_format": (
85
+ response_format.value
86
+ if hasattr(response_format, "value")
87
+ else response_format
88
+ ),
89
+ "temperature": temperature,
90
+ }
91
+ )
92
+
93
+ if language is not None:
94
+ params_data["language"] = language
95
+
96
+ if prompt is not None:
97
+ params_data["prompt"] = prompt
98
+
99
+ if timestamp_granularities is not None:
100
+ params_data["timestamp_granularities"] = (
101
+ timestamp_granularities.value
102
+ if hasattr(timestamp_granularities, "value")
103
+ else timestamp_granularities
104
+ )
105
+
106
+ # Add any additional kwargs
107
+ params_data.update(kwargs)
108
+
109
+ try:
110
+ response, _, _ = requestor.request(
111
+ options=TogetherRequest(
112
+ method="POST",
113
+ url="audio/transcriptions",
114
+ params=params_data,
115
+ files=files_data,
116
+ ),
117
+ )
118
+ finally:
119
+ # Close file if we opened it
120
+ if files_data and "file" in files_data:
121
+ try:
122
+ # Only close if it's a file object (not a tuple for URL)
123
+ file_obj = files_data["file"]
124
+ if hasattr(file_obj, "close") and not isinstance(file_obj, tuple):
125
+ file_obj.close()
126
+ except:
127
+ pass
128
+
129
+ # Parse response based on format
130
+ if (
131
+ response_format == "verbose_json"
132
+ or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
133
+ ):
134
+ return AudioTranscriptionVerboseResponse(**response.data)
135
+ else:
136
+ return AudioTranscriptionResponse(**response.data)
137
+
138
+
139
+ class AsyncTranscriptions:
140
+ def __init__(self, client: TogetherClient) -> None:
141
+ self._client = client
142
+
143
+ async def create(
144
+ self,
145
+ *,
146
+ file: Union[str, BinaryIO, Path],
147
+ model: str = "openai/whisper-large-v3",
148
+ language: Optional[str] = None,
149
+ prompt: Optional[str] = None,
150
+ response_format: Union[str, AudioTranscriptionResponseFormat] = "json",
151
+ temperature: float = 0.0,
152
+ timestamp_granularities: Optional[
153
+ Union[str, AudioTimestampGranularities]
154
+ ] = None,
155
+ **kwargs: Any,
156
+ ) -> Union[AudioTranscriptionResponse, AudioTranscriptionVerboseResponse]:
157
+ """
158
+ Async version of transcribe audio into the input language.
159
+
160
+ Args:
161
+ file: The audio file object (not file name) to transcribe, in one of these formats:
162
+ flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
163
+ Can be a file path (str/Path), file object (BinaryIO), or URL (str).
164
+ model: ID of the model to use. Defaults to "openai/whisper-large-v3".
165
+ language: The language of the input audio. Supplying the input language in
166
+ ISO-639-1 format will improve accuracy and latency.
167
+ prompt: An optional text to guide the model's style or continue a previous
168
+ audio segment. The prompt should match the audio language.
169
+ response_format: The format of the transcript output, in one of these options:
170
+ json, verbose_json.
171
+ temperature: The sampling temperature, between 0 and 1. Higher values like 0.8
172
+ will make the output more random, while lower values like 0.2 will make it
173
+ more focused and deterministic.
174
+ timestamp_granularities: The timestamp granularities to populate for this
175
+ transcription. response_format must be set verbose_json to use timestamp
176
+ granularities. Either or both of these options are supported: word, or segment.
177
+
178
+ Returns:
179
+ The transcribed text in the requested format.
180
+ """
181
+
182
+ requestor = api_requestor.APIRequestor(
183
+ client=self._client,
184
+ )
185
+
186
+ # Handle file input - could be a path, URL, or file object
187
+ files_data: Dict[str, Union[Tuple[None, str], BinaryIO]] = {}
188
+ params_data = {}
189
+
190
+ if isinstance(file, (str, Path)):
191
+ if isinstance(file, str) and file.startswith(("http://", "https://")):
192
+ # URL string - send as multipart field
193
+ files_data["file"] = (None, file)
194
+ else:
195
+ # Local file path
196
+ file_path = Path(file)
197
+ files_data["file"] = open(file_path, "rb")
198
+ else:
199
+ # File object
200
+ files_data["file"] = file
201
+
202
+ # Build request parameters
203
+ params_data.update(
204
+ {
205
+ "model": model,
206
+ "response_format": (
207
+ response_format
208
+ if isinstance(response_format, str)
209
+ else (
210
+ response_format.value
211
+ if hasattr(response_format, "value")
212
+ else response_format
213
+ )
214
+ ),
215
+ "temperature": temperature,
216
+ }
217
+ )
218
+
219
+ if language is not None:
220
+ params_data["language"] = language
221
+
222
+ if prompt is not None:
223
+ params_data["prompt"] = prompt
224
+
225
+ if timestamp_granularities is not None:
226
+ params_data["timestamp_granularities"] = (
227
+ timestamp_granularities
228
+ if isinstance(timestamp_granularities, str)
229
+ else (
230
+ timestamp_granularities.value
231
+ if hasattr(timestamp_granularities, "value")
232
+ else timestamp_granularities
233
+ )
234
+ )
235
+
236
+ # Add any additional kwargs
237
+ params_data.update(kwargs)
238
+
239
+ try:
240
+ response, _, _ = await requestor.arequest(
241
+ options=TogetherRequest(
242
+ method="POST",
243
+ url="audio/transcriptions",
244
+ params=params_data,
245
+ files=files_data,
246
+ ),
247
+ )
248
+ finally:
249
+ # Close file if we opened it
250
+ if files_data and "file" in files_data:
251
+ try:
252
+ # Only close if it's a file object (not a tuple for URL)
253
+ file_obj = files_data["file"]
254
+ if hasattr(file_obj, "close") and not isinstance(file_obj, tuple):
255
+ file_obj.close()
256
+ except:
257
+ pass
258
+
259
+ # Parse response based on format
260
+ if (
261
+ response_format == "verbose_json"
262
+ or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
263
+ ):
264
+ return AudioTranscriptionVerboseResponse(**response.data)
265
+ else:
266
+ return AudioTranscriptionResponse(**response.data)
@@ -0,0 +1,276 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Union, BinaryIO, Optional, Dict, Tuple
4
+ from pathlib import Path
5
+
6
+ from together.abstract import api_requestor
7
+ from together.together_response import TogetherResponse
8
+ from together.types import (
9
+ AudioTranslationRequest,
10
+ AudioTranslationResponse,
11
+ AudioTranslationVerboseResponse,
12
+ AudioTranscriptionResponseFormat,
13
+ AudioTimestampGranularities,
14
+ TogetherClient,
15
+ TogetherRequest,
16
+ )
17
+
18
+
19
+ class Translations:
20
+ def __init__(self, client: TogetherClient) -> None:
21
+ self._client = client
22
+
23
+ def create(
24
+ self,
25
+ *,
26
+ file: Union[str, BinaryIO, Path],
27
+ model: str = "openai/whisper-large-v3",
28
+ language: Optional[str] = None,
29
+ prompt: Optional[str] = None,
30
+ response_format: Union[str, AudioTranscriptionResponseFormat] = "json",
31
+ temperature: float = 0.0,
32
+ timestamp_granularities: Optional[
33
+ Union[str, AudioTimestampGranularities]
34
+ ] = None,
35
+ **kwargs: Any,
36
+ ) -> Union[AudioTranslationResponse, AudioTranslationVerboseResponse]:
37
+ """
38
+ Translates audio into English.
39
+
40
+ Args:
41
+ file: The audio file object (not file name) to translate, in one of these formats:
42
+ flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
43
+ Can be a file path (str/Path), file object (BinaryIO), or URL (str).
44
+ model: ID of the model to use. Defaults to "openai/whisper-large-v3".
45
+ language: The language of the input audio. Optional ISO-639-1 language code.
46
+ If omitted, language is set to English.
47
+ prompt: An optional text to guide the model's style or continue a previous
48
+ audio segment. The prompt should be in English.
49
+ response_format: The format of the transcript output, in one of these options:
50
+ json, verbose_json.
51
+ temperature: The sampling temperature, between 0 and 1. Higher values like 0.8
52
+ will make the output more random, while lower values like 0.2 will make it
53
+ more focused and deterministic.
54
+ timestamp_granularities: The timestamp granularities to populate for this
55
+ translation. response_format must be set verbose_json to use timestamp
56
+ granularities. Either or both of these options are supported: word, or segment.
57
+
58
+ Returns:
59
+ The translated text in the requested format.
60
+ """
61
+
62
+ requestor = api_requestor.APIRequestor(
63
+ client=self._client,
64
+ )
65
+
66
+ # Handle file input - could be a path, URL, or file object
67
+ files_data: Dict[str, Union[Tuple[None, str], BinaryIO]] = {}
68
+ params_data = {}
69
+
70
+ if isinstance(file, (str, Path)):
71
+ if isinstance(file, str) and file.startswith(("http://", "https://")):
72
+ # URL string - send as multipart field
73
+ files_data["file"] = (None, file)
74
+ else:
75
+ # Local file path
76
+ file_path = Path(file)
77
+ files_data["file"] = open(file_path, "rb")
78
+ else:
79
+ # File object
80
+ files_data["file"] = file
81
+
82
+ # Build request parameters
83
+ params_data.update(
84
+ {
85
+ "model": model,
86
+ "response_format": (
87
+ response_format
88
+ if isinstance(response_format, str)
89
+ else (
90
+ response_format.value
91
+ if hasattr(response_format, "value")
92
+ else response_format
93
+ )
94
+ ),
95
+ "temperature": temperature,
96
+ }
97
+ )
98
+
99
+ if language is not None:
100
+ params_data["language"] = language
101
+
102
+ if prompt is not None:
103
+ params_data["prompt"] = prompt
104
+
105
+ if timestamp_granularities is not None:
106
+ params_data["timestamp_granularities"] = (
107
+ timestamp_granularities
108
+ if isinstance(timestamp_granularities, str)
109
+ else (
110
+ timestamp_granularities.value
111
+ if hasattr(timestamp_granularities, "value")
112
+ else timestamp_granularities
113
+ )
114
+ )
115
+
116
+ # Add any additional kwargs
117
+ params_data.update(kwargs)
118
+
119
+ try:
120
+ response, _, _ = requestor.request(
121
+ options=TogetherRequest(
122
+ method="POST",
123
+ url="audio/translations",
124
+ params=params_data,
125
+ files=files_data,
126
+ ),
127
+ )
128
+ finally:
129
+ # Close file if we opened it
130
+ if files_data and "file" in files_data:
131
+ try:
132
+ # Only close if it's a file object (not a tuple for URL)
133
+ file_obj = files_data["file"]
134
+ if hasattr(file_obj, "close") and not isinstance(file_obj, tuple):
135
+ file_obj.close()
136
+ except:
137
+ pass
138
+
139
+ # Parse response based on format
140
+ if (
141
+ response_format == "verbose_json"
142
+ or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
143
+ ):
144
+ return AudioTranslationVerboseResponse(**response.data)
145
+ else:
146
+ return AudioTranslationResponse(**response.data)
147
+
148
+
149
+ class AsyncTranslations:
150
+ def __init__(self, client: TogetherClient) -> None:
151
+ self._client = client
152
+
153
+ async def create(
154
+ self,
155
+ *,
156
+ file: Union[str, BinaryIO, Path],
157
+ model: str = "openai/whisper-large-v3",
158
+ language: Optional[str] = None,
159
+ prompt: Optional[str] = None,
160
+ response_format: Union[str, AudioTranscriptionResponseFormat] = "json",
161
+ temperature: float = 0.0,
162
+ timestamp_granularities: Optional[
163
+ Union[str, AudioTimestampGranularities]
164
+ ] = None,
165
+ **kwargs: Any,
166
+ ) -> Union[AudioTranslationResponse, AudioTranslationVerboseResponse]:
167
+ """
168
+ Async version of translate audio into English.
169
+
170
+ Args:
171
+ file: The audio file object (not file name) to translate, in one of these formats:
172
+ flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
173
+ Can be a file path (str/Path), file object (BinaryIO), or URL (str).
174
+ model: ID of the model to use. Defaults to "openai/whisper-large-v3".
175
+ language: The language of the input audio. Optional ISO-639-1 language code.
176
+ If omitted, language is set to English.
177
+ prompt: An optional text to guide the model's style or continue a previous
178
+ audio segment. The prompt should be in English.
179
+ response_format: The format of the transcript output, in one of these options:
180
+ json, verbose_json.
181
+ temperature: The sampling temperature, between 0 and 1. Higher values like 0.8
182
+ will make the output more random, while lower values like 0.2 will make it
183
+ more focused and deterministic.
184
+ timestamp_granularities: The timestamp granularities to populate for this
185
+ translation. response_format must be set verbose_json to use timestamp
186
+ granularities. Either or both of these options are supported: word, or segment.
187
+
188
+ Returns:
189
+ The translated text in the requested format.
190
+ """
191
+
192
+ requestor = api_requestor.APIRequestor(
193
+ client=self._client,
194
+ )
195
+
196
+ # Handle file input - could be a path, URL, or file object
197
+ files_data: Dict[str, Union[Tuple[None, str], BinaryIO]] = {}
198
+ params_data = {}
199
+
200
+ if isinstance(file, (str, Path)):
201
+ if isinstance(file, str) and file.startswith(("http://", "https://")):
202
+ # URL string - send as multipart field
203
+ files_data["file"] = (None, file)
204
+ else:
205
+ # Local file path
206
+ file_path = Path(file)
207
+ files_data["file"] = open(file_path, "rb")
208
+ else:
209
+ # File object
210
+ files_data["file"] = file
211
+
212
+ # Build request parameters
213
+ params_data.update(
214
+ {
215
+ "model": model,
216
+ "response_format": (
217
+ response_format
218
+ if isinstance(response_format, str)
219
+ else (
220
+ response_format.value
221
+ if hasattr(response_format, "value")
222
+ else response_format
223
+ )
224
+ ),
225
+ "temperature": temperature,
226
+ }
227
+ )
228
+
229
+ if language is not None:
230
+ params_data["language"] = language
231
+
232
+ if prompt is not None:
233
+ params_data["prompt"] = prompt
234
+
235
+ if timestamp_granularities is not None:
236
+ params_data["timestamp_granularities"] = (
237
+ timestamp_granularities
238
+ if isinstance(timestamp_granularities, str)
239
+ else (
240
+ timestamp_granularities.value
241
+ if hasattr(timestamp_granularities, "value")
242
+ else timestamp_granularities
243
+ )
244
+ )
245
+
246
+ # Add any additional kwargs
247
+ params_data.update(kwargs)
248
+
249
+ try:
250
+ response, _, _ = await requestor.arequest(
251
+ options=TogetherRequest(
252
+ method="POST",
253
+ url="audio/translations",
254
+ params=params_data,
255
+ files=files_data,
256
+ ),
257
+ )
258
+ finally:
259
+ # Close file if we opened it
260
+ if files_data and "file" in files_data:
261
+ try:
262
+ # Only close if it's a file object (not a tuple for URL)
263
+ file_obj = files_data["file"]
264
+ if hasattr(file_obj, "close") and not isinstance(file_obj, tuple):
265
+ file_obj.close()
266
+ except:
267
+ pass
268
+
269
+ # Parse response based on format
270
+ if (
271
+ response_format == "verbose_json"
272
+ or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
273
+ ):
274
+ return AudioTranslationVerboseResponse(**response.data)
275
+ else:
276
+ return AudioTranslationResponse(**response.data)
@@ -7,6 +7,14 @@ from together.types.audio_speech import (
7
7
  AudioSpeechStreamChunk,
8
8
  AudioSpeechStreamEvent,
9
9
  AudioSpeechStreamResponse,
10
+ AudioTranscriptionRequest,
11
+ AudioTranslationRequest,
12
+ AudioTranscriptionResponse,
13
+ AudioTranscriptionVerboseResponse,
14
+ AudioTranslationResponse,
15
+ AudioTranslationVerboseResponse,
16
+ AudioTranscriptionResponseFormat,
17
+ AudioTimestampGranularities,
10
18
  )
11
19
  from together.types.chat_completions import (
12
20
  ChatCompletionChunk,
@@ -102,6 +110,14 @@ __all__ = [
102
110
  "AudioSpeechStreamChunk",
103
111
  "AudioSpeechStreamEvent",
104
112
  "AudioSpeechStreamResponse",
113
+ "AudioTranscriptionRequest",
114
+ "AudioTranslationRequest",
115
+ "AudioTranscriptionResponse",
116
+ "AudioTranscriptionVerboseResponse",
117
+ "AudioTranslationResponse",
118
+ "AudioTranslationVerboseResponse",
119
+ "AudioTranscriptionResponseFormat",
120
+ "AudioTimestampGranularities",
105
121
  "DedicatedEndpoint",
106
122
  "ListEndpoint",
107
123
  "Autoscaling",
@@ -1,13 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import base64
3
4
  from enum import Enum
4
- from typing import Iterator
5
- import threading
5
+ from typing import BinaryIO, Iterator, List, Optional, Union
6
6
 
7
7
  from pydantic import BaseModel, ConfigDict
8
8
 
9
9
  from together.together_response import TogetherResponse
10
- import base64
11
10
 
12
11
 
13
12
  class AudioResponseFormat(str, Enum):
@@ -79,23 +78,19 @@ class AudioSpeechStreamEventResponse(BaseModel):
79
78
 
80
79
 
81
80
  class AudioSpeechStreamResponse(BaseModel):
82
-
83
81
  response: TogetherResponse | Iterator[TogetherResponse]
84
82
 
85
83
  model_config = ConfigDict(arbitrary_types_allowed=True)
86
84
 
87
85
  def stream_to_file(self, file_path: str) -> None:
88
-
89
86
  if isinstance(self.response, TogetherResponse):
90
87
  # save response to file
91
88
  with open(file_path, "wb") as f:
92
89
  f.write(self.response.data)
93
90
 
94
91
  elif isinstance(self.response, Iterator):
95
-
96
92
  with open(file_path, "wb") as f:
97
93
  for chunk in self.response:
98
-
99
94
  # Try to parse as stream chunk
100
95
  stream_event_response = AudioSpeechStreamEventResponse(
101
96
  response={"data": chunk.data}
@@ -108,3 +103,83 @@ class AudioSpeechStreamResponse(BaseModel):
108
103
  audio = base64.b64decode(stream_event_response.response.data.b64)
109
104
 
110
105
  f.write(audio)
106
+
107
+
108
+ class AudioTranscriptionResponseFormat(str, Enum):
109
+ JSON = "json"
110
+ VERBOSE_JSON = "verbose_json"
111
+
112
+
113
+ class AudioTimestampGranularities(str, Enum):
114
+ SEGMENT = "segment"
115
+ WORD = "word"
116
+
117
+
118
+ class AudioTranscriptionRequest(BaseModel):
119
+ model_config = ConfigDict(arbitrary_types_allowed=True)
120
+
121
+ file: Union[str, BinaryIO]
122
+ model: str = "openai/whisper-large-v3"
123
+ language: Optional[str] = None
124
+ prompt: Optional[str] = None
125
+ response_format: AudioTranscriptionResponseFormat = (
126
+ AudioTranscriptionResponseFormat.JSON
127
+ )
128
+ temperature: float = 0.0
129
+ timestamp_granularities: Optional[AudioTimestampGranularities] = (
130
+ AudioTimestampGranularities.SEGMENT
131
+ )
132
+
133
+
134
+ class AudioTranslationRequest(BaseModel):
135
+ model_config = ConfigDict(arbitrary_types_allowed=True)
136
+
137
+ file: Union[str, BinaryIO]
138
+ model: str = "openai/whisper-large-v3"
139
+ language: Optional[str] = None
140
+ prompt: Optional[str] = None
141
+ response_format: AudioTranscriptionResponseFormat = (
142
+ AudioTranscriptionResponseFormat.JSON
143
+ )
144
+ temperature: float = 0.0
145
+ timestamp_granularities: Optional[AudioTimestampGranularities] = (
146
+ AudioTimestampGranularities.SEGMENT
147
+ )
148
+
149
+
150
+ class AudioTranscriptionSegment(BaseModel):
151
+ id: int
152
+ start: float
153
+ end: float
154
+ text: str
155
+
156
+
157
+ class AudioTranscriptionWord(BaseModel):
158
+ word: str
159
+ start: float
160
+ end: float
161
+
162
+
163
+ class AudioTranscriptionResponse(BaseModel):
164
+ text: str
165
+
166
+
167
+ class AudioTranscriptionVerboseResponse(BaseModel):
168
+ language: Optional[str] = None
169
+ duration: Optional[float] = None
170
+ text: str
171
+ segments: Optional[List[AudioTranscriptionSegment]] = None
172
+ words: Optional[List[AudioTranscriptionWord]] = None
173
+
174
+
175
+ class AudioTranslationResponse(BaseModel):
176
+ text: str
177
+
178
+
179
+ class AudioTranslationVerboseResponse(BaseModel):
180
+ task: Optional[str] = None
181
+ language: Optional[str] = None
182
+ duration: Optional[float] = None
183
+ text: str
184
+ segments: Optional[List[AudioTranscriptionSegment]] = None
185
+ words: Optional[List[AudioTranscriptionWord]] = None
@@ -1,24 +0,0 @@
1
- from functools import cached_property
2
-
3
- from together.resources.audio.speech import AsyncSpeech, Speech
4
- from together.types import (
5
- TogetherClient,
6
- )
7
-
8
-
9
- class Audio:
10
- def __init__(self, client: TogetherClient) -> None:
11
- self._client = client
12
-
13
- @cached_property
14
- def speech(self) -> Speech:
15
- return Speech(self._client)
16
-
17
-
18
- class AsyncAudio:
19
- def __init__(self, client: TogetherClient) -> None:
20
- self._client = client
21
-
22
- @cached_property
23
- def speech(self) -> AsyncSpeech:
24
- return AsyncSpeech(self._client)
File without changes
File without changes