together 1.5.20__py3-none-any.whl → 1.5.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- together/cli/api/evaluation.py +379 -0
- together/cli/api/finetune.py +0 -14
- together/cli/cli.py +2 -0
- together/client.py +6 -0
- together/filemanager.py +2 -4
- together/legacy/finetune.py +2 -2
- together/resources/__init__.py +3 -0
- together/resources/audio/__init__.py +18 -0
- together/resources/audio/transcriptions.py +266 -0
- together/resources/audio/translations.py +276 -0
- together/resources/batch.py +0 -1
- together/resources/evaluation.py +724 -0
- together/resources/finetune.py +13 -26
- together/types/__init__.py +40 -0
- together/types/audio_speech.py +82 -7
- together/types/evaluation.py +87 -0
- together/types/files.py +2 -0
- together/types/finetune.py +1 -1
- together/utils/files.py +178 -73
- {together-1.5.20.dist-info → together-1.5.23.dist-info}/METADATA +28 -1
- {together-1.5.20.dist-info → together-1.5.23.dist-info}/RECORD +24 -19
- {together-1.5.20.dist-info → together-1.5.23.dist-info}/LICENSE +0 -0
- {together-1.5.20.dist-info → together-1.5.23.dist-info}/WHEEL +0 -0
- {together-1.5.20.dist-info → together-1.5.23.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, BinaryIO, Dict, Optional, Tuple, Union
|
|
5
|
+
|
|
6
|
+
from together.abstract import api_requestor
|
|
7
|
+
from together.types import (
|
|
8
|
+
AudioTimestampGranularities,
|
|
9
|
+
AudioTranscriptionResponse,
|
|
10
|
+
AudioTranscriptionResponseFormat,
|
|
11
|
+
AudioTranscriptionVerboseResponse,
|
|
12
|
+
TogetherClient,
|
|
13
|
+
TogetherRequest,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Transcriptions:
|
|
18
|
+
def __init__(self, client: TogetherClient) -> None:
|
|
19
|
+
self._client = client
|
|
20
|
+
|
|
21
|
+
def create(
|
|
22
|
+
self,
|
|
23
|
+
*,
|
|
24
|
+
file: Union[str, BinaryIO, Path],
|
|
25
|
+
model: str = "openai/whisper-large-v3",
|
|
26
|
+
language: Optional[str] = None,
|
|
27
|
+
prompt: Optional[str] = None,
|
|
28
|
+
response_format: Union[str, AudioTranscriptionResponseFormat] = "json",
|
|
29
|
+
temperature: float = 0.0,
|
|
30
|
+
timestamp_granularities: Optional[
|
|
31
|
+
Union[str, AudioTimestampGranularities]
|
|
32
|
+
] = None,
|
|
33
|
+
**kwargs: Any,
|
|
34
|
+
) -> Union[AudioTranscriptionResponse, AudioTranscriptionVerboseResponse]:
|
|
35
|
+
"""
|
|
36
|
+
Transcribes audio into the input language.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
file: The audio file object (not file name) to transcribe, in one of these formats:
|
|
40
|
+
flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
|
|
41
|
+
Can be a file path (str/Path), file object (BinaryIO), or URL (str).
|
|
42
|
+
model: ID of the model to use. Defaults to "openai/whisper-large-v3".
|
|
43
|
+
language: The language of the input audio. Supplying the input language in
|
|
44
|
+
ISO-639-1 format will improve accuracy and latency.
|
|
45
|
+
prompt: An optional text to guide the model's style or continue a previous
|
|
46
|
+
audio segment. The prompt should match the audio language.
|
|
47
|
+
response_format: The format of the transcript output, in one of these options:
|
|
48
|
+
json, verbose_json.
|
|
49
|
+
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8
|
|
50
|
+
will make the output more random, while lower values like 0.2 will make it
|
|
51
|
+
more focused and deterministic.
|
|
52
|
+
timestamp_granularities: The timestamp granularities to populate for this
|
|
53
|
+
transcription. response_format must be set verbose_json to use timestamp
|
|
54
|
+
granularities. Either or both of these options are supported: word, or segment.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
The transcribed text in the requested format.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
requestor = api_requestor.APIRequestor(
|
|
61
|
+
client=self._client,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Handle file input - could be a path, URL, or file object
|
|
65
|
+
files_data: Dict[str, Union[Tuple[None, str], BinaryIO]] = {}
|
|
66
|
+
params_data = {}
|
|
67
|
+
|
|
68
|
+
if isinstance(file, (str, Path)):
|
|
69
|
+
if isinstance(file, str) and file.startswith(("http://", "https://")):
|
|
70
|
+
# URL string - send as multipart field
|
|
71
|
+
files_data["file"] = (None, file)
|
|
72
|
+
else:
|
|
73
|
+
# Local file path
|
|
74
|
+
file_path = Path(file)
|
|
75
|
+
files_data["file"] = open(file_path, "rb")
|
|
76
|
+
else:
|
|
77
|
+
# File object
|
|
78
|
+
files_data["file"] = file
|
|
79
|
+
|
|
80
|
+
# Build request parameters
|
|
81
|
+
params_data.update(
|
|
82
|
+
{
|
|
83
|
+
"model": model,
|
|
84
|
+
"response_format": (
|
|
85
|
+
response_format.value
|
|
86
|
+
if hasattr(response_format, "value")
|
|
87
|
+
else response_format
|
|
88
|
+
),
|
|
89
|
+
"temperature": temperature,
|
|
90
|
+
}
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
if language is not None:
|
|
94
|
+
params_data["language"] = language
|
|
95
|
+
|
|
96
|
+
if prompt is not None:
|
|
97
|
+
params_data["prompt"] = prompt
|
|
98
|
+
|
|
99
|
+
if timestamp_granularities is not None:
|
|
100
|
+
params_data["timestamp_granularities"] = (
|
|
101
|
+
timestamp_granularities.value
|
|
102
|
+
if hasattr(timestamp_granularities, "value")
|
|
103
|
+
else timestamp_granularities
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# Add any additional kwargs
|
|
107
|
+
params_data.update(kwargs)
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
response, _, _ = requestor.request(
|
|
111
|
+
options=TogetherRequest(
|
|
112
|
+
method="POST",
|
|
113
|
+
url="audio/transcriptions",
|
|
114
|
+
params=params_data,
|
|
115
|
+
files=files_data,
|
|
116
|
+
),
|
|
117
|
+
)
|
|
118
|
+
finally:
|
|
119
|
+
# Close file if we opened it
|
|
120
|
+
if files_data and "file" in files_data:
|
|
121
|
+
try:
|
|
122
|
+
# Only close if it's a file object (not a tuple for URL)
|
|
123
|
+
file_obj = files_data["file"]
|
|
124
|
+
if hasattr(file_obj, "close") and not isinstance(file_obj, tuple):
|
|
125
|
+
file_obj.close()
|
|
126
|
+
except:
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
# Parse response based on format
|
|
130
|
+
if (
|
|
131
|
+
response_format == "verbose_json"
|
|
132
|
+
or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
|
|
133
|
+
):
|
|
134
|
+
return AudioTranscriptionVerboseResponse(**response.data)
|
|
135
|
+
else:
|
|
136
|
+
return AudioTranscriptionResponse(**response.data)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class AsyncTranscriptions:
|
|
140
|
+
def __init__(self, client: TogetherClient) -> None:
|
|
141
|
+
self._client = client
|
|
142
|
+
|
|
143
|
+
async def create(
|
|
144
|
+
self,
|
|
145
|
+
*,
|
|
146
|
+
file: Union[str, BinaryIO, Path],
|
|
147
|
+
model: str = "openai/whisper-large-v3",
|
|
148
|
+
language: Optional[str] = None,
|
|
149
|
+
prompt: Optional[str] = None,
|
|
150
|
+
response_format: Union[str, AudioTranscriptionResponseFormat] = "json",
|
|
151
|
+
temperature: float = 0.0,
|
|
152
|
+
timestamp_granularities: Optional[
|
|
153
|
+
Union[str, AudioTimestampGranularities]
|
|
154
|
+
] = None,
|
|
155
|
+
**kwargs: Any,
|
|
156
|
+
) -> Union[AudioTranscriptionResponse, AudioTranscriptionVerboseResponse]:
|
|
157
|
+
"""
|
|
158
|
+
Async version of transcribe audio into the input language.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
file: The audio file object (not file name) to transcribe, in one of these formats:
|
|
162
|
+
flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
|
|
163
|
+
Can be a file path (str/Path), file object (BinaryIO), or URL (str).
|
|
164
|
+
model: ID of the model to use. Defaults to "openai/whisper-large-v3".
|
|
165
|
+
language: The language of the input audio. Supplying the input language in
|
|
166
|
+
ISO-639-1 format will improve accuracy and latency.
|
|
167
|
+
prompt: An optional text to guide the model's style or continue a previous
|
|
168
|
+
audio segment. The prompt should match the audio language.
|
|
169
|
+
response_format: The format of the transcript output, in one of these options:
|
|
170
|
+
json, verbose_json.
|
|
171
|
+
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8
|
|
172
|
+
will make the output more random, while lower values like 0.2 will make it
|
|
173
|
+
more focused and deterministic.
|
|
174
|
+
timestamp_granularities: The timestamp granularities to populate for this
|
|
175
|
+
transcription. response_format must be set verbose_json to use timestamp
|
|
176
|
+
granularities. Either or both of these options are supported: word, or segment.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
The transcribed text in the requested format.
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
requestor = api_requestor.APIRequestor(
|
|
183
|
+
client=self._client,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# Handle file input - could be a path, URL, or file object
|
|
187
|
+
files_data: Dict[str, Union[Tuple[None, str], BinaryIO]] = {}
|
|
188
|
+
params_data = {}
|
|
189
|
+
|
|
190
|
+
if isinstance(file, (str, Path)):
|
|
191
|
+
if isinstance(file, str) and file.startswith(("http://", "https://")):
|
|
192
|
+
# URL string - send as multipart field
|
|
193
|
+
files_data["file"] = (None, file)
|
|
194
|
+
else:
|
|
195
|
+
# Local file path
|
|
196
|
+
file_path = Path(file)
|
|
197
|
+
files_data["file"] = open(file_path, "rb")
|
|
198
|
+
else:
|
|
199
|
+
# File object
|
|
200
|
+
files_data["file"] = file
|
|
201
|
+
|
|
202
|
+
# Build request parameters
|
|
203
|
+
params_data.update(
|
|
204
|
+
{
|
|
205
|
+
"model": model,
|
|
206
|
+
"response_format": (
|
|
207
|
+
response_format
|
|
208
|
+
if isinstance(response_format, str)
|
|
209
|
+
else (
|
|
210
|
+
response_format.value
|
|
211
|
+
if hasattr(response_format, "value")
|
|
212
|
+
else response_format
|
|
213
|
+
)
|
|
214
|
+
),
|
|
215
|
+
"temperature": temperature,
|
|
216
|
+
}
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
if language is not None:
|
|
220
|
+
params_data["language"] = language
|
|
221
|
+
|
|
222
|
+
if prompt is not None:
|
|
223
|
+
params_data["prompt"] = prompt
|
|
224
|
+
|
|
225
|
+
if timestamp_granularities is not None:
|
|
226
|
+
params_data["timestamp_granularities"] = (
|
|
227
|
+
timestamp_granularities
|
|
228
|
+
if isinstance(timestamp_granularities, str)
|
|
229
|
+
else (
|
|
230
|
+
timestamp_granularities.value
|
|
231
|
+
if hasattr(timestamp_granularities, "value")
|
|
232
|
+
else timestamp_granularities
|
|
233
|
+
)
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Add any additional kwargs
|
|
237
|
+
params_data.update(kwargs)
|
|
238
|
+
|
|
239
|
+
try:
|
|
240
|
+
response, _, _ = await requestor.arequest(
|
|
241
|
+
options=TogetherRequest(
|
|
242
|
+
method="POST",
|
|
243
|
+
url="audio/transcriptions",
|
|
244
|
+
params=params_data,
|
|
245
|
+
files=files_data,
|
|
246
|
+
),
|
|
247
|
+
)
|
|
248
|
+
finally:
|
|
249
|
+
# Close file if we opened it
|
|
250
|
+
if files_data and "file" in files_data:
|
|
251
|
+
try:
|
|
252
|
+
# Only close if it's a file object (not a tuple for URL)
|
|
253
|
+
file_obj = files_data["file"]
|
|
254
|
+
if hasattr(file_obj, "close") and not isinstance(file_obj, tuple):
|
|
255
|
+
file_obj.close()
|
|
256
|
+
except:
|
|
257
|
+
pass
|
|
258
|
+
|
|
259
|
+
# Parse response based on format
|
|
260
|
+
if (
|
|
261
|
+
response_format == "verbose_json"
|
|
262
|
+
or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
|
|
263
|
+
):
|
|
264
|
+
return AudioTranscriptionVerboseResponse(**response.data)
|
|
265
|
+
else:
|
|
266
|
+
return AudioTranscriptionResponse(**response.data)
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Union, BinaryIO, Optional, Dict, Tuple
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from together.abstract import api_requestor
|
|
7
|
+
from together.together_response import TogetherResponse
|
|
8
|
+
from together.types import (
|
|
9
|
+
AudioTranslationRequest,
|
|
10
|
+
AudioTranslationResponse,
|
|
11
|
+
AudioTranslationVerboseResponse,
|
|
12
|
+
AudioTranscriptionResponseFormat,
|
|
13
|
+
AudioTimestampGranularities,
|
|
14
|
+
TogetherClient,
|
|
15
|
+
TogetherRequest,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Translations:
|
|
20
|
+
def __init__(self, client: TogetherClient) -> None:
|
|
21
|
+
self._client = client
|
|
22
|
+
|
|
23
|
+
def create(
|
|
24
|
+
self,
|
|
25
|
+
*,
|
|
26
|
+
file: Union[str, BinaryIO, Path],
|
|
27
|
+
model: str = "openai/whisper-large-v3",
|
|
28
|
+
language: Optional[str] = None,
|
|
29
|
+
prompt: Optional[str] = None,
|
|
30
|
+
response_format: Union[str, AudioTranscriptionResponseFormat] = "json",
|
|
31
|
+
temperature: float = 0.0,
|
|
32
|
+
timestamp_granularities: Optional[
|
|
33
|
+
Union[str, AudioTimestampGranularities]
|
|
34
|
+
] = None,
|
|
35
|
+
**kwargs: Any,
|
|
36
|
+
) -> Union[AudioTranslationResponse, AudioTranslationVerboseResponse]:
|
|
37
|
+
"""
|
|
38
|
+
Translates audio into English.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
file: The audio file object (not file name) to translate, in one of these formats:
|
|
42
|
+
flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
|
|
43
|
+
Can be a file path (str/Path), file object (BinaryIO), or URL (str).
|
|
44
|
+
model: ID of the model to use. Defaults to "openai/whisper-large-v3".
|
|
45
|
+
language: The language of the input audio. Optional ISO-639-1 language code.
|
|
46
|
+
If omitted, language is set to English.
|
|
47
|
+
prompt: An optional text to guide the model's style or continue a previous
|
|
48
|
+
audio segment. The prompt should be in English.
|
|
49
|
+
response_format: The format of the transcript output, in one of these options:
|
|
50
|
+
json, verbose_json.
|
|
51
|
+
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8
|
|
52
|
+
will make the output more random, while lower values like 0.2 will make it
|
|
53
|
+
more focused and deterministic.
|
|
54
|
+
timestamp_granularities: The timestamp granularities to populate for this
|
|
55
|
+
translation. response_format must be set verbose_json to use timestamp
|
|
56
|
+
granularities. Either or both of these options are supported: word, or segment.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
The translated text in the requested format.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
requestor = api_requestor.APIRequestor(
|
|
63
|
+
client=self._client,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# Handle file input - could be a path, URL, or file object
|
|
67
|
+
files_data: Dict[str, Union[Tuple[None, str], BinaryIO]] = {}
|
|
68
|
+
params_data = {}
|
|
69
|
+
|
|
70
|
+
if isinstance(file, (str, Path)):
|
|
71
|
+
if isinstance(file, str) and file.startswith(("http://", "https://")):
|
|
72
|
+
# URL string - send as multipart field
|
|
73
|
+
files_data["file"] = (None, file)
|
|
74
|
+
else:
|
|
75
|
+
# Local file path
|
|
76
|
+
file_path = Path(file)
|
|
77
|
+
files_data["file"] = open(file_path, "rb")
|
|
78
|
+
else:
|
|
79
|
+
# File object
|
|
80
|
+
files_data["file"] = file
|
|
81
|
+
|
|
82
|
+
# Build request parameters
|
|
83
|
+
params_data.update(
|
|
84
|
+
{
|
|
85
|
+
"model": model,
|
|
86
|
+
"response_format": (
|
|
87
|
+
response_format
|
|
88
|
+
if isinstance(response_format, str)
|
|
89
|
+
else (
|
|
90
|
+
response_format.value
|
|
91
|
+
if hasattr(response_format, "value")
|
|
92
|
+
else response_format
|
|
93
|
+
)
|
|
94
|
+
),
|
|
95
|
+
"temperature": temperature,
|
|
96
|
+
}
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
if language is not None:
|
|
100
|
+
params_data["language"] = language
|
|
101
|
+
|
|
102
|
+
if prompt is not None:
|
|
103
|
+
params_data["prompt"] = prompt
|
|
104
|
+
|
|
105
|
+
if timestamp_granularities is not None:
|
|
106
|
+
params_data["timestamp_granularities"] = (
|
|
107
|
+
timestamp_granularities
|
|
108
|
+
if isinstance(timestamp_granularities, str)
|
|
109
|
+
else (
|
|
110
|
+
timestamp_granularities.value
|
|
111
|
+
if hasattr(timestamp_granularities, "value")
|
|
112
|
+
else timestamp_granularities
|
|
113
|
+
)
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Add any additional kwargs
|
|
117
|
+
params_data.update(kwargs)
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
response, _, _ = requestor.request(
|
|
121
|
+
options=TogetherRequest(
|
|
122
|
+
method="POST",
|
|
123
|
+
url="audio/translations",
|
|
124
|
+
params=params_data,
|
|
125
|
+
files=files_data,
|
|
126
|
+
),
|
|
127
|
+
)
|
|
128
|
+
finally:
|
|
129
|
+
# Close file if we opened it
|
|
130
|
+
if files_data and "file" in files_data:
|
|
131
|
+
try:
|
|
132
|
+
# Only close if it's a file object (not a tuple for URL)
|
|
133
|
+
file_obj = files_data["file"]
|
|
134
|
+
if hasattr(file_obj, "close") and not isinstance(file_obj, tuple):
|
|
135
|
+
file_obj.close()
|
|
136
|
+
except:
|
|
137
|
+
pass
|
|
138
|
+
|
|
139
|
+
# Parse response based on format
|
|
140
|
+
if (
|
|
141
|
+
response_format == "verbose_json"
|
|
142
|
+
or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
|
|
143
|
+
):
|
|
144
|
+
return AudioTranslationVerboseResponse(**response.data)
|
|
145
|
+
else:
|
|
146
|
+
return AudioTranslationResponse(**response.data)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class AsyncTranslations:
|
|
150
|
+
def __init__(self, client: TogetherClient) -> None:
|
|
151
|
+
self._client = client
|
|
152
|
+
|
|
153
|
+
async def create(
|
|
154
|
+
self,
|
|
155
|
+
*,
|
|
156
|
+
file: Union[str, BinaryIO, Path],
|
|
157
|
+
model: str = "openai/whisper-large-v3",
|
|
158
|
+
language: Optional[str] = None,
|
|
159
|
+
prompt: Optional[str] = None,
|
|
160
|
+
response_format: Union[str, AudioTranscriptionResponseFormat] = "json",
|
|
161
|
+
temperature: float = 0.0,
|
|
162
|
+
timestamp_granularities: Optional[
|
|
163
|
+
Union[str, AudioTimestampGranularities]
|
|
164
|
+
] = None,
|
|
165
|
+
**kwargs: Any,
|
|
166
|
+
) -> Union[AudioTranslationResponse, AudioTranslationVerboseResponse]:
|
|
167
|
+
"""
|
|
168
|
+
Async version of translate audio into English.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
file: The audio file object (not file name) to translate, in one of these formats:
|
|
172
|
+
flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
|
|
173
|
+
Can be a file path (str/Path), file object (BinaryIO), or URL (str).
|
|
174
|
+
model: ID of the model to use. Defaults to "openai/whisper-large-v3".
|
|
175
|
+
language: The language of the input audio. Optional ISO-639-1 language code.
|
|
176
|
+
If omitted, language is set to English.
|
|
177
|
+
prompt: An optional text to guide the model's style or continue a previous
|
|
178
|
+
audio segment. The prompt should be in English.
|
|
179
|
+
response_format: The format of the transcript output, in one of these options:
|
|
180
|
+
json, verbose_json.
|
|
181
|
+
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8
|
|
182
|
+
will make the output more random, while lower values like 0.2 will make it
|
|
183
|
+
more focused and deterministic.
|
|
184
|
+
timestamp_granularities: The timestamp granularities to populate for this
|
|
185
|
+
translation. response_format must be set verbose_json to use timestamp
|
|
186
|
+
granularities. Either or both of these options are supported: word, or segment.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
The translated text in the requested format.
|
|
190
|
+
"""
|
|
191
|
+
|
|
192
|
+
requestor = api_requestor.APIRequestor(
|
|
193
|
+
client=self._client,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# Handle file input - could be a path, URL, or file object
|
|
197
|
+
files_data: Dict[str, Union[Tuple[None, str], BinaryIO]] = {}
|
|
198
|
+
params_data = {}
|
|
199
|
+
|
|
200
|
+
if isinstance(file, (str, Path)):
|
|
201
|
+
if isinstance(file, str) and file.startswith(("http://", "https://")):
|
|
202
|
+
# URL string - send as multipart field
|
|
203
|
+
files_data["file"] = (None, file)
|
|
204
|
+
else:
|
|
205
|
+
# Local file path
|
|
206
|
+
file_path = Path(file)
|
|
207
|
+
files_data["file"] = open(file_path, "rb")
|
|
208
|
+
else:
|
|
209
|
+
# File object
|
|
210
|
+
files_data["file"] = file
|
|
211
|
+
|
|
212
|
+
# Build request parameters
|
|
213
|
+
params_data.update(
|
|
214
|
+
{
|
|
215
|
+
"model": model,
|
|
216
|
+
"response_format": (
|
|
217
|
+
response_format
|
|
218
|
+
if isinstance(response_format, str)
|
|
219
|
+
else (
|
|
220
|
+
response_format.value
|
|
221
|
+
if hasattr(response_format, "value")
|
|
222
|
+
else response_format
|
|
223
|
+
)
|
|
224
|
+
),
|
|
225
|
+
"temperature": temperature,
|
|
226
|
+
}
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
if language is not None:
|
|
230
|
+
params_data["language"] = language
|
|
231
|
+
|
|
232
|
+
if prompt is not None:
|
|
233
|
+
params_data["prompt"] = prompt
|
|
234
|
+
|
|
235
|
+
if timestamp_granularities is not None:
|
|
236
|
+
params_data["timestamp_granularities"] = (
|
|
237
|
+
timestamp_granularities
|
|
238
|
+
if isinstance(timestamp_granularities, str)
|
|
239
|
+
else (
|
|
240
|
+
timestamp_granularities.value
|
|
241
|
+
if hasattr(timestamp_granularities, "value")
|
|
242
|
+
else timestamp_granularities
|
|
243
|
+
)
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
# Add any additional kwargs
|
|
247
|
+
params_data.update(kwargs)
|
|
248
|
+
|
|
249
|
+
try:
|
|
250
|
+
response, _, _ = await requestor.arequest(
|
|
251
|
+
options=TogetherRequest(
|
|
252
|
+
method="POST",
|
|
253
|
+
url="audio/translations",
|
|
254
|
+
params=params_data,
|
|
255
|
+
files=files_data,
|
|
256
|
+
),
|
|
257
|
+
)
|
|
258
|
+
finally:
|
|
259
|
+
# Close file if we opened it
|
|
260
|
+
if files_data and "file" in files_data:
|
|
261
|
+
try:
|
|
262
|
+
# Only close if it's a file object (not a tuple for URL)
|
|
263
|
+
file_obj = files_data["file"]
|
|
264
|
+
if hasattr(file_obj, "close") and not isinstance(file_obj, tuple):
|
|
265
|
+
file_obj.close()
|
|
266
|
+
except:
|
|
267
|
+
pass
|
|
268
|
+
|
|
269
|
+
# Parse response based on format
|
|
270
|
+
if (
|
|
271
|
+
response_format == "verbose_json"
|
|
272
|
+
or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
|
|
273
|
+
):
|
|
274
|
+
return AudioTranslationVerboseResponse(**response.data)
|
|
275
|
+
else:
|
|
276
|
+
return AudioTranslationResponse(**response.data)
|