together 1.5.19__tar.gz → 1.5.21__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {together-1.5.19 → together-1.5.21}/PKG-INFO +1 -1
- {together-1.5.19 → together-1.5.21}/pyproject.toml +1 -1
- {together-1.5.19 → together-1.5.21}/src/together/client.py +2 -0
- together-1.5.21/src/together/resources/audio/__init__.py +42 -0
- together-1.5.21/src/together/resources/audio/transcriptions.py +266 -0
- together-1.5.21/src/together/resources/audio/translations.py +276 -0
- {together-1.5.19 → together-1.5.21}/src/together/types/__init__.py +16 -0
- {together-1.5.19 → together-1.5.21}/src/together/types/audio_speech.py +82 -7
- {together-1.5.19 → together-1.5.21}/src/together/types/models.py +1 -0
- together-1.5.19/src/together/resources/audio/__init__.py +0 -24
- {together-1.5.19 → together-1.5.21}/LICENSE +0 -0
- {together-1.5.19 → together-1.5.21}/README.md +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/__init__.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/abstract/__init__.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/abstract/api_requestor.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/cli/__init__.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/cli/api/__init__.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/cli/api/chat.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/cli/api/completions.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/cli/api/endpoints.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/cli/api/files.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/cli/api/finetune.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/cli/api/images.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/cli/api/models.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/cli/api/utils.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/cli/cli.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/constants.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/error.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/filemanager.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/legacy/__init__.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/legacy/base.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/legacy/complete.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/legacy/embeddings.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/legacy/files.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/legacy/finetune.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/legacy/images.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/legacy/models.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/resources/__init__.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/resources/audio/speech.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/resources/batch.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/resources/chat/__init__.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/resources/chat/completions.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/resources/code_interpreter.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/resources/completions.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/resources/embeddings.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/resources/endpoints.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/resources/files.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/resources/finetune.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/resources/images.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/resources/models.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/resources/rerank.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/together_response.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/types/abstract.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/types/batch.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/types/chat_completions.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/types/code_interpreter.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/types/common.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/types/completions.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/types/embeddings.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/types/endpoints.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/types/error.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/types/files.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/types/finetune.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/types/images.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/types/rerank.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/utils/__init__.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/utils/_log.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/utils/api_helpers.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/utils/files.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/utils/tools.py +0 -0
- {together-1.5.19 → together-1.5.21}/src/together/version.py +0 -0
|
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
|
|
|
12
12
|
|
|
13
13
|
[tool.poetry]
|
|
14
14
|
name = "together"
|
|
15
|
-
version = "1.5.
|
|
15
|
+
version = "1.5.21"
|
|
16
16
|
authors = ["Together AI <support@together.ai>"]
|
|
17
17
|
description = "Python client for Together's Cloud Platform!"
|
|
18
18
|
readme = "README.md"
|
|
@@ -103,6 +103,7 @@ class AsyncTogether:
|
|
|
103
103
|
models: resources.AsyncModels
|
|
104
104
|
fine_tuning: resources.AsyncFineTuning
|
|
105
105
|
rerank: resources.AsyncRerank
|
|
106
|
+
audio: resources.AsyncAudio
|
|
106
107
|
code_interpreter: CodeInterpreter
|
|
107
108
|
batches: resources.AsyncBatches
|
|
108
109
|
# client options
|
|
@@ -167,6 +168,7 @@ class AsyncTogether:
|
|
|
167
168
|
self.models = resources.AsyncModels(self.client)
|
|
168
169
|
self.fine_tuning = resources.AsyncFineTuning(self.client)
|
|
169
170
|
self.rerank = resources.AsyncRerank(self.client)
|
|
171
|
+
self.audio = resources.AsyncAudio(self.client)
|
|
170
172
|
self.code_interpreter = CodeInterpreter(self.client)
|
|
171
173
|
self.batches = resources.AsyncBatches(self.client)
|
|
172
174
|
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from functools import cached_property
|
|
2
|
+
|
|
3
|
+
from together.resources.audio.speech import AsyncSpeech, Speech
|
|
4
|
+
from together.resources.audio.transcriptions import AsyncTranscriptions, Transcriptions
|
|
5
|
+
from together.resources.audio.translations import AsyncTranslations, Translations
|
|
6
|
+
from together.types import (
|
|
7
|
+
TogetherClient,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Audio:
|
|
12
|
+
def __init__(self, client: TogetherClient) -> None:
|
|
13
|
+
self._client = client
|
|
14
|
+
|
|
15
|
+
@cached_property
|
|
16
|
+
def speech(self) -> Speech:
|
|
17
|
+
return Speech(self._client)
|
|
18
|
+
|
|
19
|
+
@cached_property
|
|
20
|
+
def transcriptions(self) -> Transcriptions:
|
|
21
|
+
return Transcriptions(self._client)
|
|
22
|
+
|
|
23
|
+
@cached_property
|
|
24
|
+
def translations(self) -> Translations:
|
|
25
|
+
return Translations(self._client)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class AsyncAudio:
|
|
29
|
+
def __init__(self, client: TogetherClient) -> None:
|
|
30
|
+
self._client = client
|
|
31
|
+
|
|
32
|
+
@cached_property
|
|
33
|
+
def speech(self) -> AsyncSpeech:
|
|
34
|
+
return AsyncSpeech(self._client)
|
|
35
|
+
|
|
36
|
+
@cached_property
|
|
37
|
+
def transcriptions(self) -> AsyncTranscriptions:
|
|
38
|
+
return AsyncTranscriptions(self._client)
|
|
39
|
+
|
|
40
|
+
@cached_property
|
|
41
|
+
def translations(self) -> AsyncTranslations:
|
|
42
|
+
return AsyncTranslations(self._client)
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, BinaryIO, Dict, Optional, Tuple, Union
|
|
5
|
+
|
|
6
|
+
from together.abstract import api_requestor
|
|
7
|
+
from together.types import (
|
|
8
|
+
AudioTimestampGranularities,
|
|
9
|
+
AudioTranscriptionResponse,
|
|
10
|
+
AudioTranscriptionResponseFormat,
|
|
11
|
+
AudioTranscriptionVerboseResponse,
|
|
12
|
+
TogetherClient,
|
|
13
|
+
TogetherRequest,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Transcriptions:
|
|
18
|
+
def __init__(self, client: TogetherClient) -> None:
|
|
19
|
+
self._client = client
|
|
20
|
+
|
|
21
|
+
def create(
|
|
22
|
+
self,
|
|
23
|
+
*,
|
|
24
|
+
file: Union[str, BinaryIO, Path],
|
|
25
|
+
model: str = "openai/whisper-large-v3",
|
|
26
|
+
language: Optional[str] = None,
|
|
27
|
+
prompt: Optional[str] = None,
|
|
28
|
+
response_format: Union[str, AudioTranscriptionResponseFormat] = "json",
|
|
29
|
+
temperature: float = 0.0,
|
|
30
|
+
timestamp_granularities: Optional[
|
|
31
|
+
Union[str, AudioTimestampGranularities]
|
|
32
|
+
] = None,
|
|
33
|
+
**kwargs: Any,
|
|
34
|
+
) -> Union[AudioTranscriptionResponse, AudioTranscriptionVerboseResponse]:
|
|
35
|
+
"""
|
|
36
|
+
Transcribes audio into the input language.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
file: The audio file object (not file name) to transcribe, in one of these formats:
|
|
40
|
+
flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
|
|
41
|
+
Can be a file path (str/Path), file object (BinaryIO), or URL (str).
|
|
42
|
+
model: ID of the model to use. Defaults to "openai/whisper-large-v3".
|
|
43
|
+
language: The language of the input audio. Supplying the input language in
|
|
44
|
+
ISO-639-1 format will improve accuracy and latency.
|
|
45
|
+
prompt: An optional text to guide the model's style or continue a previous
|
|
46
|
+
audio segment. The prompt should match the audio language.
|
|
47
|
+
response_format: The format of the transcript output, in one of these options:
|
|
48
|
+
json, verbose_json.
|
|
49
|
+
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8
|
|
50
|
+
will make the output more random, while lower values like 0.2 will make it
|
|
51
|
+
more focused and deterministic.
|
|
52
|
+
timestamp_granularities: The timestamp granularities to populate for this
|
|
53
|
+
transcription. response_format must be set verbose_json to use timestamp
|
|
54
|
+
granularities. Either or both of these options are supported: word, or segment.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
The transcribed text in the requested format.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
requestor = api_requestor.APIRequestor(
|
|
61
|
+
client=self._client,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Handle file input - could be a path, URL, or file object
|
|
65
|
+
files_data: Dict[str, Union[Tuple[None, str], BinaryIO]] = {}
|
|
66
|
+
params_data = {}
|
|
67
|
+
|
|
68
|
+
if isinstance(file, (str, Path)):
|
|
69
|
+
if isinstance(file, str) and file.startswith(("http://", "https://")):
|
|
70
|
+
# URL string - send as multipart field
|
|
71
|
+
files_data["file"] = (None, file)
|
|
72
|
+
else:
|
|
73
|
+
# Local file path
|
|
74
|
+
file_path = Path(file)
|
|
75
|
+
files_data["file"] = open(file_path, "rb")
|
|
76
|
+
else:
|
|
77
|
+
# File object
|
|
78
|
+
files_data["file"] = file
|
|
79
|
+
|
|
80
|
+
# Build request parameters
|
|
81
|
+
params_data.update(
|
|
82
|
+
{
|
|
83
|
+
"model": model,
|
|
84
|
+
"response_format": (
|
|
85
|
+
response_format.value
|
|
86
|
+
if hasattr(response_format, "value")
|
|
87
|
+
else response_format
|
|
88
|
+
),
|
|
89
|
+
"temperature": temperature,
|
|
90
|
+
}
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
if language is not None:
|
|
94
|
+
params_data["language"] = language
|
|
95
|
+
|
|
96
|
+
if prompt is not None:
|
|
97
|
+
params_data["prompt"] = prompt
|
|
98
|
+
|
|
99
|
+
if timestamp_granularities is not None:
|
|
100
|
+
params_data["timestamp_granularities"] = (
|
|
101
|
+
timestamp_granularities.value
|
|
102
|
+
if hasattr(timestamp_granularities, "value")
|
|
103
|
+
else timestamp_granularities
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# Add any additional kwargs
|
|
107
|
+
params_data.update(kwargs)
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
response, _, _ = requestor.request(
|
|
111
|
+
options=TogetherRequest(
|
|
112
|
+
method="POST",
|
|
113
|
+
url="audio/transcriptions",
|
|
114
|
+
params=params_data,
|
|
115
|
+
files=files_data,
|
|
116
|
+
),
|
|
117
|
+
)
|
|
118
|
+
finally:
|
|
119
|
+
# Close file if we opened it
|
|
120
|
+
if files_data and "file" in files_data:
|
|
121
|
+
try:
|
|
122
|
+
# Only close if it's a file object (not a tuple for URL)
|
|
123
|
+
file_obj = files_data["file"]
|
|
124
|
+
if hasattr(file_obj, "close") and not isinstance(file_obj, tuple):
|
|
125
|
+
file_obj.close()
|
|
126
|
+
except:
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
# Parse response based on format
|
|
130
|
+
if (
|
|
131
|
+
response_format == "verbose_json"
|
|
132
|
+
or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
|
|
133
|
+
):
|
|
134
|
+
return AudioTranscriptionVerboseResponse(**response.data)
|
|
135
|
+
else:
|
|
136
|
+
return AudioTranscriptionResponse(**response.data)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class AsyncTranscriptions:
|
|
140
|
+
def __init__(self, client: TogetherClient) -> None:
|
|
141
|
+
self._client = client
|
|
142
|
+
|
|
143
|
+
async def create(
|
|
144
|
+
self,
|
|
145
|
+
*,
|
|
146
|
+
file: Union[str, BinaryIO, Path],
|
|
147
|
+
model: str = "openai/whisper-large-v3",
|
|
148
|
+
language: Optional[str] = None,
|
|
149
|
+
prompt: Optional[str] = None,
|
|
150
|
+
response_format: Union[str, AudioTranscriptionResponseFormat] = "json",
|
|
151
|
+
temperature: float = 0.0,
|
|
152
|
+
timestamp_granularities: Optional[
|
|
153
|
+
Union[str, AudioTimestampGranularities]
|
|
154
|
+
] = None,
|
|
155
|
+
**kwargs: Any,
|
|
156
|
+
) -> Union[AudioTranscriptionResponse, AudioTranscriptionVerboseResponse]:
|
|
157
|
+
"""
|
|
158
|
+
Async version of transcribe audio into the input language.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
file: The audio file object (not file name) to transcribe, in one of these formats:
|
|
162
|
+
flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
|
|
163
|
+
Can be a file path (str/Path), file object (BinaryIO), or URL (str).
|
|
164
|
+
model: ID of the model to use. Defaults to "openai/whisper-large-v3".
|
|
165
|
+
language: The language of the input audio. Supplying the input language in
|
|
166
|
+
ISO-639-1 format will improve accuracy and latency.
|
|
167
|
+
prompt: An optional text to guide the model's style or continue a previous
|
|
168
|
+
audio segment. The prompt should match the audio language.
|
|
169
|
+
response_format: The format of the transcript output, in one of these options:
|
|
170
|
+
json, verbose_json.
|
|
171
|
+
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8
|
|
172
|
+
will make the output more random, while lower values like 0.2 will make it
|
|
173
|
+
more focused and deterministic.
|
|
174
|
+
timestamp_granularities: The timestamp granularities to populate for this
|
|
175
|
+
transcription. response_format must be set verbose_json to use timestamp
|
|
176
|
+
granularities. Either or both of these options are supported: word, or segment.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
The transcribed text in the requested format.
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
requestor = api_requestor.APIRequestor(
|
|
183
|
+
client=self._client,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# Handle file input - could be a path, URL, or file object
|
|
187
|
+
files_data: Dict[str, Union[Tuple[None, str], BinaryIO]] = {}
|
|
188
|
+
params_data = {}
|
|
189
|
+
|
|
190
|
+
if isinstance(file, (str, Path)):
|
|
191
|
+
if isinstance(file, str) and file.startswith(("http://", "https://")):
|
|
192
|
+
# URL string - send as multipart field
|
|
193
|
+
files_data["file"] = (None, file)
|
|
194
|
+
else:
|
|
195
|
+
# Local file path
|
|
196
|
+
file_path = Path(file)
|
|
197
|
+
files_data["file"] = open(file_path, "rb")
|
|
198
|
+
else:
|
|
199
|
+
# File object
|
|
200
|
+
files_data["file"] = file
|
|
201
|
+
|
|
202
|
+
# Build request parameters
|
|
203
|
+
params_data.update(
|
|
204
|
+
{
|
|
205
|
+
"model": model,
|
|
206
|
+
"response_format": (
|
|
207
|
+
response_format
|
|
208
|
+
if isinstance(response_format, str)
|
|
209
|
+
else (
|
|
210
|
+
response_format.value
|
|
211
|
+
if hasattr(response_format, "value")
|
|
212
|
+
else response_format
|
|
213
|
+
)
|
|
214
|
+
),
|
|
215
|
+
"temperature": temperature,
|
|
216
|
+
}
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
if language is not None:
|
|
220
|
+
params_data["language"] = language
|
|
221
|
+
|
|
222
|
+
if prompt is not None:
|
|
223
|
+
params_data["prompt"] = prompt
|
|
224
|
+
|
|
225
|
+
if timestamp_granularities is not None:
|
|
226
|
+
params_data["timestamp_granularities"] = (
|
|
227
|
+
timestamp_granularities
|
|
228
|
+
if isinstance(timestamp_granularities, str)
|
|
229
|
+
else (
|
|
230
|
+
timestamp_granularities.value
|
|
231
|
+
if hasattr(timestamp_granularities, "value")
|
|
232
|
+
else timestamp_granularities
|
|
233
|
+
)
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Add any additional kwargs
|
|
237
|
+
params_data.update(kwargs)
|
|
238
|
+
|
|
239
|
+
try:
|
|
240
|
+
response, _, _ = await requestor.arequest(
|
|
241
|
+
options=TogetherRequest(
|
|
242
|
+
method="POST",
|
|
243
|
+
url="audio/transcriptions",
|
|
244
|
+
params=params_data,
|
|
245
|
+
files=files_data,
|
|
246
|
+
),
|
|
247
|
+
)
|
|
248
|
+
finally:
|
|
249
|
+
# Close file if we opened it
|
|
250
|
+
if files_data and "file" in files_data:
|
|
251
|
+
try:
|
|
252
|
+
# Only close if it's a file object (not a tuple for URL)
|
|
253
|
+
file_obj = files_data["file"]
|
|
254
|
+
if hasattr(file_obj, "close") and not isinstance(file_obj, tuple):
|
|
255
|
+
file_obj.close()
|
|
256
|
+
except:
|
|
257
|
+
pass
|
|
258
|
+
|
|
259
|
+
# Parse response based on format
|
|
260
|
+
if (
|
|
261
|
+
response_format == "verbose_json"
|
|
262
|
+
or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
|
|
263
|
+
):
|
|
264
|
+
return AudioTranscriptionVerboseResponse(**response.data)
|
|
265
|
+
else:
|
|
266
|
+
return AudioTranscriptionResponse(**response.data)
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Union, BinaryIO, Optional, Dict, Tuple
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from together.abstract import api_requestor
|
|
7
|
+
from together.together_response import TogetherResponse
|
|
8
|
+
from together.types import (
|
|
9
|
+
AudioTranslationRequest,
|
|
10
|
+
AudioTranslationResponse,
|
|
11
|
+
AudioTranslationVerboseResponse,
|
|
12
|
+
AudioTranscriptionResponseFormat,
|
|
13
|
+
AudioTimestampGranularities,
|
|
14
|
+
TogetherClient,
|
|
15
|
+
TogetherRequest,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Translations:
|
|
20
|
+
def __init__(self, client: TogetherClient) -> None:
|
|
21
|
+
self._client = client
|
|
22
|
+
|
|
23
|
+
def create(
|
|
24
|
+
self,
|
|
25
|
+
*,
|
|
26
|
+
file: Union[str, BinaryIO, Path],
|
|
27
|
+
model: str = "openai/whisper-large-v3",
|
|
28
|
+
language: Optional[str] = None,
|
|
29
|
+
prompt: Optional[str] = None,
|
|
30
|
+
response_format: Union[str, AudioTranscriptionResponseFormat] = "json",
|
|
31
|
+
temperature: float = 0.0,
|
|
32
|
+
timestamp_granularities: Optional[
|
|
33
|
+
Union[str, AudioTimestampGranularities]
|
|
34
|
+
] = None,
|
|
35
|
+
**kwargs: Any,
|
|
36
|
+
) -> Union[AudioTranslationResponse, AudioTranslationVerboseResponse]:
|
|
37
|
+
"""
|
|
38
|
+
Translates audio into English.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
file: The audio file object (not file name) to translate, in one of these formats:
|
|
42
|
+
flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
|
|
43
|
+
Can be a file path (str/Path), file object (BinaryIO), or URL (str).
|
|
44
|
+
model: ID of the model to use. Defaults to "openai/whisper-large-v3".
|
|
45
|
+
language: The language of the input audio. Optional ISO-639-1 language code.
|
|
46
|
+
If omitted, language is set to English.
|
|
47
|
+
prompt: An optional text to guide the model's style or continue a previous
|
|
48
|
+
audio segment. The prompt should be in English.
|
|
49
|
+
response_format: The format of the transcript output, in one of these options:
|
|
50
|
+
json, verbose_json.
|
|
51
|
+
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8
|
|
52
|
+
will make the output more random, while lower values like 0.2 will make it
|
|
53
|
+
more focused and deterministic.
|
|
54
|
+
timestamp_granularities: The timestamp granularities to populate for this
|
|
55
|
+
translation. response_format must be set verbose_json to use timestamp
|
|
56
|
+
granularities. Either or both of these options are supported: word, or segment.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
The translated text in the requested format.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
requestor = api_requestor.APIRequestor(
|
|
63
|
+
client=self._client,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# Handle file input - could be a path, URL, or file object
|
|
67
|
+
files_data: Dict[str, Union[Tuple[None, str], BinaryIO]] = {}
|
|
68
|
+
params_data = {}
|
|
69
|
+
|
|
70
|
+
if isinstance(file, (str, Path)):
|
|
71
|
+
if isinstance(file, str) and file.startswith(("http://", "https://")):
|
|
72
|
+
# URL string - send as multipart field
|
|
73
|
+
files_data["file"] = (None, file)
|
|
74
|
+
else:
|
|
75
|
+
# Local file path
|
|
76
|
+
file_path = Path(file)
|
|
77
|
+
files_data["file"] = open(file_path, "rb")
|
|
78
|
+
else:
|
|
79
|
+
# File object
|
|
80
|
+
files_data["file"] = file
|
|
81
|
+
|
|
82
|
+
# Build request parameters
|
|
83
|
+
params_data.update(
|
|
84
|
+
{
|
|
85
|
+
"model": model,
|
|
86
|
+
"response_format": (
|
|
87
|
+
response_format
|
|
88
|
+
if isinstance(response_format, str)
|
|
89
|
+
else (
|
|
90
|
+
response_format.value
|
|
91
|
+
if hasattr(response_format, "value")
|
|
92
|
+
else response_format
|
|
93
|
+
)
|
|
94
|
+
),
|
|
95
|
+
"temperature": temperature,
|
|
96
|
+
}
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
if language is not None:
|
|
100
|
+
params_data["language"] = language
|
|
101
|
+
|
|
102
|
+
if prompt is not None:
|
|
103
|
+
params_data["prompt"] = prompt
|
|
104
|
+
|
|
105
|
+
if timestamp_granularities is not None:
|
|
106
|
+
params_data["timestamp_granularities"] = (
|
|
107
|
+
timestamp_granularities
|
|
108
|
+
if isinstance(timestamp_granularities, str)
|
|
109
|
+
else (
|
|
110
|
+
timestamp_granularities.value
|
|
111
|
+
if hasattr(timestamp_granularities, "value")
|
|
112
|
+
else timestamp_granularities
|
|
113
|
+
)
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Add any additional kwargs
|
|
117
|
+
params_data.update(kwargs)
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
response, _, _ = requestor.request(
|
|
121
|
+
options=TogetherRequest(
|
|
122
|
+
method="POST",
|
|
123
|
+
url="audio/translations",
|
|
124
|
+
params=params_data,
|
|
125
|
+
files=files_data,
|
|
126
|
+
),
|
|
127
|
+
)
|
|
128
|
+
finally:
|
|
129
|
+
# Close file if we opened it
|
|
130
|
+
if files_data and "file" in files_data:
|
|
131
|
+
try:
|
|
132
|
+
# Only close if it's a file object (not a tuple for URL)
|
|
133
|
+
file_obj = files_data["file"]
|
|
134
|
+
if hasattr(file_obj, "close") and not isinstance(file_obj, tuple):
|
|
135
|
+
file_obj.close()
|
|
136
|
+
except:
|
|
137
|
+
pass
|
|
138
|
+
|
|
139
|
+
# Parse response based on format
|
|
140
|
+
if (
|
|
141
|
+
response_format == "verbose_json"
|
|
142
|
+
or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
|
|
143
|
+
):
|
|
144
|
+
return AudioTranslationVerboseResponse(**response.data)
|
|
145
|
+
else:
|
|
146
|
+
return AudioTranslationResponse(**response.data)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class AsyncTranslations:
|
|
150
|
+
def __init__(self, client: TogetherClient) -> None:
|
|
151
|
+
self._client = client
|
|
152
|
+
|
|
153
|
+
async def create(
|
|
154
|
+
self,
|
|
155
|
+
*,
|
|
156
|
+
file: Union[str, BinaryIO, Path],
|
|
157
|
+
model: str = "openai/whisper-large-v3",
|
|
158
|
+
language: Optional[str] = None,
|
|
159
|
+
prompt: Optional[str] = None,
|
|
160
|
+
response_format: Union[str, AudioTranscriptionResponseFormat] = "json",
|
|
161
|
+
temperature: float = 0.0,
|
|
162
|
+
timestamp_granularities: Optional[
|
|
163
|
+
Union[str, AudioTimestampGranularities]
|
|
164
|
+
] = None,
|
|
165
|
+
**kwargs: Any,
|
|
166
|
+
) -> Union[AudioTranslationResponse, AudioTranslationVerboseResponse]:
|
|
167
|
+
"""
|
|
168
|
+
Async version of translate audio into English.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
file: The audio file object (not file name) to translate, in one of these formats:
|
|
172
|
+
flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
|
|
173
|
+
Can be a file path (str/Path), file object (BinaryIO), or URL (str).
|
|
174
|
+
model: ID of the model to use. Defaults to "openai/whisper-large-v3".
|
|
175
|
+
language: The language of the input audio. Optional ISO-639-1 language code.
|
|
176
|
+
If omitted, language is set to English.
|
|
177
|
+
prompt: An optional text to guide the model's style or continue a previous
|
|
178
|
+
audio segment. The prompt should be in English.
|
|
179
|
+
response_format: The format of the transcript output, in one of these options:
|
|
180
|
+
json, verbose_json.
|
|
181
|
+
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8
|
|
182
|
+
will make the output more random, while lower values like 0.2 will make it
|
|
183
|
+
more focused and deterministic.
|
|
184
|
+
timestamp_granularities: The timestamp granularities to populate for this
|
|
185
|
+
translation. response_format must be set verbose_json to use timestamp
|
|
186
|
+
granularities. Either or both of these options are supported: word, or segment.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
The translated text in the requested format.
|
|
190
|
+
"""
|
|
191
|
+
|
|
192
|
+
requestor = api_requestor.APIRequestor(
|
|
193
|
+
client=self._client,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# Handle file input - could be a path, URL, or file object
|
|
197
|
+
files_data: Dict[str, Union[Tuple[None, str], BinaryIO]] = {}
|
|
198
|
+
params_data = {}
|
|
199
|
+
|
|
200
|
+
if isinstance(file, (str, Path)):
|
|
201
|
+
if isinstance(file, str) and file.startswith(("http://", "https://")):
|
|
202
|
+
# URL string - send as multipart field
|
|
203
|
+
files_data["file"] = (None, file)
|
|
204
|
+
else:
|
|
205
|
+
# Local file path
|
|
206
|
+
file_path = Path(file)
|
|
207
|
+
files_data["file"] = open(file_path, "rb")
|
|
208
|
+
else:
|
|
209
|
+
# File object
|
|
210
|
+
files_data["file"] = file
|
|
211
|
+
|
|
212
|
+
# Build request parameters
|
|
213
|
+
params_data.update(
|
|
214
|
+
{
|
|
215
|
+
"model": model,
|
|
216
|
+
"response_format": (
|
|
217
|
+
response_format
|
|
218
|
+
if isinstance(response_format, str)
|
|
219
|
+
else (
|
|
220
|
+
response_format.value
|
|
221
|
+
if hasattr(response_format, "value")
|
|
222
|
+
else response_format
|
|
223
|
+
)
|
|
224
|
+
),
|
|
225
|
+
"temperature": temperature,
|
|
226
|
+
}
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
if language is not None:
|
|
230
|
+
params_data["language"] = language
|
|
231
|
+
|
|
232
|
+
if prompt is not None:
|
|
233
|
+
params_data["prompt"] = prompt
|
|
234
|
+
|
|
235
|
+
if timestamp_granularities is not None:
|
|
236
|
+
params_data["timestamp_granularities"] = (
|
|
237
|
+
timestamp_granularities
|
|
238
|
+
if isinstance(timestamp_granularities, str)
|
|
239
|
+
else (
|
|
240
|
+
timestamp_granularities.value
|
|
241
|
+
if hasattr(timestamp_granularities, "value")
|
|
242
|
+
else timestamp_granularities
|
|
243
|
+
)
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
# Add any additional kwargs
|
|
247
|
+
params_data.update(kwargs)
|
|
248
|
+
|
|
249
|
+
try:
|
|
250
|
+
response, _, _ = await requestor.arequest(
|
|
251
|
+
options=TogetherRequest(
|
|
252
|
+
method="POST",
|
|
253
|
+
url="audio/translations",
|
|
254
|
+
params=params_data,
|
|
255
|
+
files=files_data,
|
|
256
|
+
),
|
|
257
|
+
)
|
|
258
|
+
finally:
|
|
259
|
+
# Close file if we opened it
|
|
260
|
+
if files_data and "file" in files_data:
|
|
261
|
+
try:
|
|
262
|
+
# Only close if it's a file object (not a tuple for URL)
|
|
263
|
+
file_obj = files_data["file"]
|
|
264
|
+
if hasattr(file_obj, "close") and not isinstance(file_obj, tuple):
|
|
265
|
+
file_obj.close()
|
|
266
|
+
except:
|
|
267
|
+
pass
|
|
268
|
+
|
|
269
|
+
# Parse response based on format
|
|
270
|
+
if (
|
|
271
|
+
response_format == "verbose_json"
|
|
272
|
+
or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
|
|
273
|
+
):
|
|
274
|
+
return AudioTranslationVerboseResponse(**response.data)
|
|
275
|
+
else:
|
|
276
|
+
return AudioTranslationResponse(**response.data)
|
|
@@ -7,6 +7,14 @@ from together.types.audio_speech import (
|
|
|
7
7
|
AudioSpeechStreamChunk,
|
|
8
8
|
AudioSpeechStreamEvent,
|
|
9
9
|
AudioSpeechStreamResponse,
|
|
10
|
+
AudioTranscriptionRequest,
|
|
11
|
+
AudioTranslationRequest,
|
|
12
|
+
AudioTranscriptionResponse,
|
|
13
|
+
AudioTranscriptionVerboseResponse,
|
|
14
|
+
AudioTranslationResponse,
|
|
15
|
+
AudioTranslationVerboseResponse,
|
|
16
|
+
AudioTranscriptionResponseFormat,
|
|
17
|
+
AudioTimestampGranularities,
|
|
10
18
|
)
|
|
11
19
|
from together.types.chat_completions import (
|
|
12
20
|
ChatCompletionChunk,
|
|
@@ -102,6 +110,14 @@ __all__ = [
|
|
|
102
110
|
"AudioSpeechStreamChunk",
|
|
103
111
|
"AudioSpeechStreamEvent",
|
|
104
112
|
"AudioSpeechStreamResponse",
|
|
113
|
+
"AudioTranscriptionRequest",
|
|
114
|
+
"AudioTranslationRequest",
|
|
115
|
+
"AudioTranscriptionResponse",
|
|
116
|
+
"AudioTranscriptionVerboseResponse",
|
|
117
|
+
"AudioTranslationResponse",
|
|
118
|
+
"AudioTranslationVerboseResponse",
|
|
119
|
+
"AudioTranscriptionResponseFormat",
|
|
120
|
+
"AudioTimestampGranularities",
|
|
105
121
|
"DedicatedEndpoint",
|
|
106
122
|
"ListEndpoint",
|
|
107
123
|
"Autoscaling",
|
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import base64
|
|
3
4
|
from enum import Enum
|
|
4
|
-
from typing import Iterator
|
|
5
|
-
import threading
|
|
5
|
+
from typing import BinaryIO, Iterator, List, Optional, Union
|
|
6
6
|
|
|
7
7
|
from pydantic import BaseModel, ConfigDict
|
|
8
8
|
|
|
9
9
|
from together.together_response import TogetherResponse
|
|
10
|
-
import base64
|
|
11
10
|
|
|
12
11
|
|
|
13
12
|
class AudioResponseFormat(str, Enum):
|
|
@@ -79,23 +78,19 @@ class AudioSpeechStreamEventResponse(BaseModel):
|
|
|
79
78
|
|
|
80
79
|
|
|
81
80
|
class AudioSpeechStreamResponse(BaseModel):
|
|
82
|
-
|
|
83
81
|
response: TogetherResponse | Iterator[TogetherResponse]
|
|
84
82
|
|
|
85
83
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
86
84
|
|
|
87
85
|
def stream_to_file(self, file_path: str) -> None:
|
|
88
|
-
|
|
89
86
|
if isinstance(self.response, TogetherResponse):
|
|
90
87
|
# save response to file
|
|
91
88
|
with open(file_path, "wb") as f:
|
|
92
89
|
f.write(self.response.data)
|
|
93
90
|
|
|
94
91
|
elif isinstance(self.response, Iterator):
|
|
95
|
-
|
|
96
92
|
with open(file_path, "wb") as f:
|
|
97
93
|
for chunk in self.response:
|
|
98
|
-
|
|
99
94
|
# Try to parse as stream chunk
|
|
100
95
|
stream_event_response = AudioSpeechStreamEventResponse(
|
|
101
96
|
response={"data": chunk.data}
|
|
@@ -108,3 +103,83 @@ class AudioSpeechStreamResponse(BaseModel):
|
|
|
108
103
|
audio = base64.b64decode(stream_event_response.response.data.b64)
|
|
109
104
|
|
|
110
105
|
f.write(audio)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class AudioTranscriptionResponseFormat(str, Enum):
|
|
109
|
+
JSON = "json"
|
|
110
|
+
VERBOSE_JSON = "verbose_json"
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class AudioTimestampGranularities(str, Enum):
|
|
114
|
+
SEGMENT = "segment"
|
|
115
|
+
WORD = "word"
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class AudioTranscriptionRequest(BaseModel):
|
|
119
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
120
|
+
|
|
121
|
+
file: Union[str, BinaryIO]
|
|
122
|
+
model: str = "openai/whisper-large-v3"
|
|
123
|
+
language: Optional[str] = None
|
|
124
|
+
prompt: Optional[str] = None
|
|
125
|
+
response_format: AudioTranscriptionResponseFormat = (
|
|
126
|
+
AudioTranscriptionResponseFormat.JSON
|
|
127
|
+
)
|
|
128
|
+
temperature: float = 0.0
|
|
129
|
+
timestamp_granularities: Optional[AudioTimestampGranularities] = (
|
|
130
|
+
AudioTimestampGranularities.SEGMENT
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class AudioTranslationRequest(BaseModel):
|
|
135
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
136
|
+
|
|
137
|
+
file: Union[str, BinaryIO]
|
|
138
|
+
model: str = "openai/whisper-large-v3"
|
|
139
|
+
language: Optional[str] = None
|
|
140
|
+
prompt: Optional[str] = None
|
|
141
|
+
response_format: AudioTranscriptionResponseFormat = (
|
|
142
|
+
AudioTranscriptionResponseFormat.JSON
|
|
143
|
+
)
|
|
144
|
+
temperature: float = 0.0
|
|
145
|
+
timestamp_granularities: Optional[AudioTimestampGranularities] = (
|
|
146
|
+
AudioTimestampGranularities.SEGMENT
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class AudioTranscriptionSegment(BaseModel):
|
|
151
|
+
id: int
|
|
152
|
+
start: float
|
|
153
|
+
end: float
|
|
154
|
+
text: str
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class AudioTranscriptionWord(BaseModel):
|
|
158
|
+
word: str
|
|
159
|
+
start: float
|
|
160
|
+
end: float
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class AudioTranscriptionResponse(BaseModel):
|
|
164
|
+
text: str
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class AudioTranscriptionVerboseResponse(BaseModel):
|
|
168
|
+
language: Optional[str] = None
|
|
169
|
+
duration: Optional[float] = None
|
|
170
|
+
text: str
|
|
171
|
+
segments: Optional[List[AudioTranscriptionSegment]] = None
|
|
172
|
+
words: Optional[List[AudioTranscriptionWord]] = None
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class AudioTranslationResponse(BaseModel):
|
|
176
|
+
text: str
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class AudioTranslationVerboseResponse(BaseModel):
|
|
180
|
+
task: Optional[str] = None
|
|
181
|
+
language: Optional[str] = None
|
|
182
|
+
duration: Optional[float] = None
|
|
183
|
+
text: str
|
|
184
|
+
segments: Optional[List[AudioTranscriptionSegment]] = None
|
|
185
|
+
words: Optional[List[AudioTranscriptionWord]] = None
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
from functools import cached_property
|
|
2
|
-
|
|
3
|
-
from together.resources.audio.speech import AsyncSpeech, Speech
|
|
4
|
-
from together.types import (
|
|
5
|
-
TogetherClient,
|
|
6
|
-
)
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class Audio:
|
|
10
|
-
def __init__(self, client: TogetherClient) -> None:
|
|
11
|
-
self._client = client
|
|
12
|
-
|
|
13
|
-
@cached_property
|
|
14
|
-
def speech(self) -> Speech:
|
|
15
|
-
return Speech(self._client)
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class AsyncAudio:
|
|
19
|
-
def __init__(self, client: TogetherClient) -> None:
|
|
20
|
-
self._client = client
|
|
21
|
-
|
|
22
|
-
@cached_property
|
|
23
|
-
def speech(self) -> AsyncSpeech:
|
|
24
|
-
return AsyncSpeech(self._client)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|