together 1.5.29__py3-none-any.whl → 1.5.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- together/abstract/api_requestor.py +44 -3
- together/cli/api/chat.py +24 -2
- together/cli/api/endpoints.py +56 -6
- together/constants.py +3 -3
- together/filemanager.py +45 -22
- together/resources/audio/__init__.py +9 -0
- together/resources/audio/speech.py +8 -2
- together/resources/audio/transcriptions.py +20 -2
- together/resources/audio/voices.py +65 -0
- together/resources/endpoints.py +98 -7
- together/types/__init__.py +4 -0
- together/types/audio_speech.py +127 -14
- together/types/chat_completions.py +6 -0
- together/types/common.py +1 -0
- together/types/files.py +1 -0
- together/utils/files.py +183 -54
- {together-1.5.29.dist-info → together-1.5.31.dist-info}/METADATA +2 -1
- {together-1.5.29.dist-info → together-1.5.31.dist-info}/RECORD +21 -20
- {together-1.5.29.dist-info → together-1.5.31.dist-info}/WHEEL +0 -0
- {together-1.5.29.dist-info → together-1.5.31.dist-info}/entry_points.txt +0 -0
- {together-1.5.29.dist-info → together-1.5.31.dist-info}/licenses/LICENSE +0 -0
|
@@ -619,14 +619,29 @@ class APIRequestor:
|
|
|
619
619
|
) -> Tuple[TogetherResponse | Iterator[TogetherResponse], bool]:
|
|
620
620
|
"""Returns the response(s) and a bool indicating whether it is a stream."""
|
|
621
621
|
content_type = result.headers.get("Content-Type", "")
|
|
622
|
+
|
|
622
623
|
if stream and "text/event-stream" in content_type:
|
|
624
|
+
# SSE format streaming
|
|
623
625
|
return (
|
|
624
626
|
self._interpret_response_line(
|
|
625
627
|
line, result.status_code, result.headers, stream=True
|
|
626
628
|
)
|
|
627
629
|
for line in parse_stream(result.iter_lines())
|
|
628
630
|
), True
|
|
631
|
+
elif stream and content_type in [
|
|
632
|
+
"audio/wav",
|
|
633
|
+
"audio/mpeg",
|
|
634
|
+
"application/octet-stream",
|
|
635
|
+
]:
|
|
636
|
+
# Binary audio streaming - return chunks as binary data
|
|
637
|
+
def binary_stream_generator() -> Iterator[TogetherResponse]:
|
|
638
|
+
for chunk in result.iter_content(chunk_size=8192):
|
|
639
|
+
if chunk: # Skip empty chunks
|
|
640
|
+
yield TogetherResponse(chunk, dict(result.headers))
|
|
641
|
+
|
|
642
|
+
return binary_stream_generator(), True
|
|
629
643
|
else:
|
|
644
|
+
# Non-streaming response
|
|
630
645
|
if content_type in ["application/octet-stream", "audio/wav", "audio/mpeg"]:
|
|
631
646
|
content = result.content
|
|
632
647
|
else:
|
|
@@ -648,23 +663,49 @@ class APIRequestor:
|
|
|
648
663
|
| tuple[TogetherResponse, bool]
|
|
649
664
|
):
|
|
650
665
|
"""Returns the response(s) and a bool indicating whether it is a stream."""
|
|
651
|
-
|
|
666
|
+
content_type = result.headers.get("Content-Type", "")
|
|
667
|
+
|
|
668
|
+
if stream and "text/event-stream" in content_type:
|
|
669
|
+
# SSE format streaming
|
|
652
670
|
return (
|
|
653
671
|
self._interpret_response_line(
|
|
654
672
|
line, result.status, result.headers, stream=True
|
|
655
673
|
)
|
|
656
674
|
async for line in parse_stream_async(result.content)
|
|
657
675
|
), True
|
|
676
|
+
elif stream and content_type in [
|
|
677
|
+
"audio/wav",
|
|
678
|
+
"audio/mpeg",
|
|
679
|
+
"application/octet-stream",
|
|
680
|
+
]:
|
|
681
|
+
# Binary audio streaming - return chunks as binary data
|
|
682
|
+
async def binary_stream_generator() -> (
|
|
683
|
+
AsyncGenerator[TogetherResponse, None]
|
|
684
|
+
):
|
|
685
|
+
async for chunk in result.content.iter_chunked(8192):
|
|
686
|
+
if chunk: # Skip empty chunks
|
|
687
|
+
yield TogetherResponse(chunk, dict(result.headers))
|
|
688
|
+
|
|
689
|
+
return binary_stream_generator(), True
|
|
658
690
|
else:
|
|
691
|
+
# Non-streaming response
|
|
659
692
|
try:
|
|
660
|
-
await result.read()
|
|
693
|
+
content = await result.read()
|
|
661
694
|
except (aiohttp.ServerTimeoutError, asyncio.TimeoutError) as e:
|
|
662
695
|
raise error.Timeout("Request timed out") from e
|
|
663
696
|
except aiohttp.ClientError as e:
|
|
664
697
|
utils.log_warn(e, body=result.content)
|
|
698
|
+
|
|
699
|
+
if content_type in ["application/octet-stream", "audio/wav", "audio/mpeg"]:
|
|
700
|
+
# Binary content - keep as bytes
|
|
701
|
+
response_content: str | bytes = content
|
|
702
|
+
else:
|
|
703
|
+
# Text content - decode to string
|
|
704
|
+
response_content = content.decode("utf-8")
|
|
705
|
+
|
|
665
706
|
return (
|
|
666
707
|
self._interpret_response_line(
|
|
667
|
-
|
|
708
|
+
response_content,
|
|
668
709
|
result.status,
|
|
669
710
|
result.headers,
|
|
670
711
|
stream=False,
|
together/cli/api/chat.py
CHANGED
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import cmd
|
|
4
4
|
import json
|
|
5
|
-
from typing import List, Tuple
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
|
6
6
|
|
|
7
7
|
import click
|
|
8
8
|
|
|
@@ -181,6 +181,12 @@ def interactive(
|
|
|
181
181
|
"--frequency-penalty", type=float, help="Frequency penalty sampling method"
|
|
182
182
|
)
|
|
183
183
|
@click.option("--min-p", type=float, help="Min p sampling")
|
|
184
|
+
@click.option(
|
|
185
|
+
"--audio-url",
|
|
186
|
+
type=str,
|
|
187
|
+
multiple=True,
|
|
188
|
+
help="Audio URL to attach to the last user message",
|
|
189
|
+
)
|
|
184
190
|
@click.option("--no-stream", is_flag=True, help="Disable streaming")
|
|
185
191
|
@click.option("--logprobs", type=int, help="Return logprobs. Only works with --raw.")
|
|
186
192
|
@click.option("--echo", is_flag=True, help="Echo prompt. Only works with --raw.")
|
|
@@ -200,6 +206,7 @@ def chat(
|
|
|
200
206
|
presence_penalty: float | None = None,
|
|
201
207
|
frequency_penalty: float | None = None,
|
|
202
208
|
min_p: float | None = None,
|
|
209
|
+
audio_url: List[str] | None = None,
|
|
203
210
|
no_stream: bool = False,
|
|
204
211
|
logprobs: int | None = None,
|
|
205
212
|
echo: bool | None = None,
|
|
@@ -210,7 +217,22 @@ def chat(
|
|
|
210
217
|
"""Generate chat completions from messages"""
|
|
211
218
|
client: Together = ctx.obj
|
|
212
219
|
|
|
213
|
-
messages
|
|
220
|
+
messages: List[Dict[str, Any]] = [
|
|
221
|
+
{"role": msg[0], "content": msg[1]} for msg in message
|
|
222
|
+
]
|
|
223
|
+
|
|
224
|
+
if audio_url and messages:
|
|
225
|
+
last_msg = messages[-1]
|
|
226
|
+
if last_msg["role"] == "user":
|
|
227
|
+
# Convert content to list if it is string
|
|
228
|
+
if isinstance(last_msg["content"], str):
|
|
229
|
+
last_msg["content"] = [{"type": "text", "text": last_msg["content"]}]
|
|
230
|
+
|
|
231
|
+
# Append audio URLs
|
|
232
|
+
for url in audio_url:
|
|
233
|
+
last_msg["content"].append(
|
|
234
|
+
{"type": "audio_url", "audio_url": {"url": url}}
|
|
235
|
+
)
|
|
214
236
|
|
|
215
237
|
response = client.chat.completions.create(
|
|
216
238
|
model=model,
|
together/cli/api/endpoints.py
CHANGED
|
@@ -133,8 +133,11 @@ def endpoints(ctx: click.Context) -> None:
|
|
|
133
133
|
help="Number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable.",
|
|
134
134
|
)
|
|
135
135
|
@click.option(
|
|
136
|
-
"--
|
|
137
|
-
|
|
136
|
+
"--availability-zone",
|
|
137
|
+
help="Start endpoint in specified availability zone (e.g., us-central-4b)",
|
|
138
|
+
)
|
|
139
|
+
@click.option(
|
|
140
|
+
"--wait/--no-wait",
|
|
138
141
|
default=True,
|
|
139
142
|
help="Wait for the endpoint to be ready after creation",
|
|
140
143
|
)
|
|
@@ -152,6 +155,7 @@ def create(
|
|
|
152
155
|
no_speculative_decoding: bool,
|
|
153
156
|
no_auto_start: bool,
|
|
154
157
|
inactive_timeout: int | None,
|
|
158
|
+
availability_zone: str | None,
|
|
155
159
|
wait: bool,
|
|
156
160
|
) -> None:
|
|
157
161
|
"""Create a new dedicated inference endpoint."""
|
|
@@ -177,6 +181,7 @@ def create(
|
|
|
177
181
|
disable_speculative_decoding=no_speculative_decoding,
|
|
178
182
|
state="STOPPED" if no_auto_start else "STARTED",
|
|
179
183
|
inactive_timeout=inactive_timeout,
|
|
184
|
+
availability_zone=availability_zone,
|
|
180
185
|
)
|
|
181
186
|
except InvalidRequestError as e:
|
|
182
187
|
print_api_error(e)
|
|
@@ -203,6 +208,8 @@ def create(
|
|
|
203
208
|
click.echo(" Auto-start: disabled", err=True)
|
|
204
209
|
if inactive_timeout is not None:
|
|
205
210
|
click.echo(f" Inactive timeout: {inactive_timeout} minutes", err=True)
|
|
211
|
+
if availability_zone:
|
|
212
|
+
click.echo(f" Availability zone: {availability_zone}", err=True)
|
|
206
213
|
|
|
207
214
|
click.echo(f"Endpoint created successfully, id: {response.id}", err=True)
|
|
208
215
|
|
|
@@ -276,7 +283,9 @@ def fetch_and_print_hardware_options(
|
|
|
276
283
|
@endpoints.command()
|
|
277
284
|
@click.argument("endpoint-id", required=True)
|
|
278
285
|
@click.option(
|
|
279
|
-
"--wait",
|
|
286
|
+
"--wait/--no-wait",
|
|
287
|
+
default=True,
|
|
288
|
+
help="Wait for the endpoint to stop",
|
|
280
289
|
)
|
|
281
290
|
@click.pass_obj
|
|
282
291
|
@handle_api_errors
|
|
@@ -299,7 +308,9 @@ def stop(client: Together, endpoint_id: str, wait: bool) -> None:
|
|
|
299
308
|
@endpoints.command()
|
|
300
309
|
@click.argument("endpoint-id", required=True)
|
|
301
310
|
@click.option(
|
|
302
|
-
"--wait",
|
|
311
|
+
"--wait/--no-wait",
|
|
312
|
+
default=True,
|
|
313
|
+
help="Wait for the endpoint to start",
|
|
303
314
|
)
|
|
304
315
|
@click.pass_obj
|
|
305
316
|
@handle_api_errors
|
|
@@ -337,13 +348,30 @@ def delete(client: Together, endpoint_id: str) -> None:
|
|
|
337
348
|
type=click.Choice(["dedicated", "serverless"]),
|
|
338
349
|
help="Filter by endpoint type",
|
|
339
350
|
)
|
|
351
|
+
@click.option(
|
|
352
|
+
"--mine",
|
|
353
|
+
type=click.BOOL,
|
|
354
|
+
default=None,
|
|
355
|
+
help="true (only mine), default=all",
|
|
356
|
+
)
|
|
357
|
+
@click.option(
|
|
358
|
+
"--usage-type",
|
|
359
|
+
type=click.Choice(["on-demand", "reserved"]),
|
|
360
|
+
help="Filter by endpoint usage type",
|
|
361
|
+
)
|
|
340
362
|
@click.pass_obj
|
|
341
363
|
@handle_api_errors
|
|
342
364
|
def list(
|
|
343
|
-
client: Together,
|
|
365
|
+
client: Together,
|
|
366
|
+
json: bool,
|
|
367
|
+
type: Literal["dedicated", "serverless"] | None,
|
|
368
|
+
usage_type: Literal["on-demand", "reserved"] | None,
|
|
369
|
+
mine: bool | None,
|
|
344
370
|
) -> None:
|
|
345
371
|
"""List all inference endpoints (includes both dedicated and serverless endpoints)."""
|
|
346
|
-
endpoints: List[ListEndpoint] = client.endpoints.list(
|
|
372
|
+
endpoints: List[ListEndpoint] = client.endpoints.list(
|
|
373
|
+
type=type, usage_type=usage_type, mine=mine
|
|
374
|
+
)
|
|
347
375
|
|
|
348
376
|
if not endpoints:
|
|
349
377
|
click.echo("No dedicated endpoints found", err=True)
|
|
@@ -432,3 +460,25 @@ def update(
|
|
|
432
460
|
|
|
433
461
|
click.echo("Successfully updated endpoint", err=True)
|
|
434
462
|
click.echo(endpoint_id)
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
@endpoints.command()
|
|
466
|
+
@click.option("--json", is_flag=True, help="Print output in JSON format")
|
|
467
|
+
@click.pass_obj
|
|
468
|
+
@handle_api_errors
|
|
469
|
+
def availability_zones(client: Together, json: bool) -> None:
|
|
470
|
+
"""List all availability zones."""
|
|
471
|
+
avzones = client.endpoints.list_avzones()
|
|
472
|
+
|
|
473
|
+
if not avzones:
|
|
474
|
+
click.echo("No availability zones found", err=True)
|
|
475
|
+
return
|
|
476
|
+
|
|
477
|
+
if json:
|
|
478
|
+
import json as json_lib
|
|
479
|
+
|
|
480
|
+
click.echo(json_lib.dumps({"avzones": avzones}, indent=2))
|
|
481
|
+
else:
|
|
482
|
+
click.echo("Available zones:", err=True)
|
|
483
|
+
for availability_zone in sorted(avzones):
|
|
484
|
+
click.echo(f" {availability_zone}")
|
together/constants.py
CHANGED
|
@@ -20,13 +20,13 @@ MAX_CONCURRENT_PARTS = 4 # Maximum concurrent parts for multipart upload
|
|
|
20
20
|
|
|
21
21
|
# Multipart upload constants
|
|
22
22
|
MIN_PART_SIZE_MB = 5 # Minimum part size (S3 requirement)
|
|
23
|
-
TARGET_PART_SIZE_MB =
|
|
24
|
-
MAX_MULTIPART_PARTS = 250 # Maximum parts per upload
|
|
23
|
+
TARGET_PART_SIZE_MB = 250 # Target part size
|
|
24
|
+
MAX_MULTIPART_PARTS = 250 # Maximum parts per upload
|
|
25
25
|
MULTIPART_UPLOAD_TIMEOUT = 300 # Timeout in seconds for uploading each part
|
|
26
26
|
MULTIPART_THRESHOLD_GB = 5.0 # threshold for switching to multipart upload
|
|
27
27
|
|
|
28
28
|
# maximum number of GB sized files we support finetuning for
|
|
29
|
-
MAX_FILE_SIZE_GB =
|
|
29
|
+
MAX_FILE_SIZE_GB = 50.1
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
# Messages
|
together/filemanager.py
CHANGED
|
@@ -6,10 +6,10 @@ import shutil
|
|
|
6
6
|
import stat
|
|
7
7
|
import tempfile
|
|
8
8
|
import uuid
|
|
9
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
9
|
+
from concurrent.futures import Future, ThreadPoolExecutor, as_completed
|
|
10
10
|
from functools import partial
|
|
11
11
|
from pathlib import Path
|
|
12
|
-
from typing import Any, Dict, List, Tuple
|
|
12
|
+
from typing import Any, BinaryIO, Dict, List, Tuple
|
|
13
13
|
|
|
14
14
|
import requests
|
|
15
15
|
from filelock import FileLock
|
|
@@ -212,6 +212,7 @@ class DownloadManager:
|
|
|
212
212
|
),
|
|
213
213
|
remaining_retries=MAX_RETRIES,
|
|
214
214
|
stream=True,
|
|
215
|
+
request_timeout=3600,
|
|
215
216
|
)
|
|
216
217
|
|
|
217
218
|
try:
|
|
@@ -512,6 +513,18 @@ class MultipartUploadManager:
|
|
|
512
513
|
|
|
513
514
|
return response.data
|
|
514
515
|
|
|
516
|
+
def _submit_part(
|
|
517
|
+
self,
|
|
518
|
+
executor: ThreadPoolExecutor,
|
|
519
|
+
f: BinaryIO,
|
|
520
|
+
part_info: Dict[str, Any],
|
|
521
|
+
part_size: int,
|
|
522
|
+
) -> Future[str]:
|
|
523
|
+
"""Submit a single part for upload and return the future"""
|
|
524
|
+
f.seek((part_info["PartNumber"] - 1) * part_size)
|
|
525
|
+
part_data = f.read(part_size)
|
|
526
|
+
return executor.submit(self._upload_single_part, part_info, part_data)
|
|
527
|
+
|
|
515
528
|
def _upload_parts_concurrent(
|
|
516
529
|
self, file: Path, upload_info: Dict[str, Any], part_size: int
|
|
517
530
|
) -> List[Dict[str, Any]]:
|
|
@@ -522,29 +535,39 @@ class MultipartUploadManager:
|
|
|
522
535
|
|
|
523
536
|
with ThreadPoolExecutor(max_workers=self.max_concurrent_parts) as executor:
|
|
524
537
|
with tqdm(total=len(parts), desc="Uploading parts", unit="part") as pbar:
|
|
525
|
-
future_to_part = {}
|
|
526
|
-
|
|
527
538
|
with open(file, "rb") as f:
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
part_data = f.read(part_size)
|
|
539
|
+
future_to_part = {}
|
|
540
|
+
part_index = 0
|
|
531
541
|
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
542
|
+
# Submit initial batch limited by max_concurrent_parts
|
|
543
|
+
for _ in range(min(self.max_concurrent_parts, len(parts))):
|
|
544
|
+
part_info = parts[part_index]
|
|
545
|
+
future = self._submit_part(executor, f, part_info, part_size)
|
|
535
546
|
future_to_part[future] = part_info["PartNumber"]
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
547
|
+
part_index += 1
|
|
548
|
+
|
|
549
|
+
# Process completions and submit new parts (sliding window)
|
|
550
|
+
while future_to_part:
|
|
551
|
+
done_future = next(as_completed(future_to_part))
|
|
552
|
+
part_number = future_to_part.pop(done_future)
|
|
553
|
+
|
|
554
|
+
try:
|
|
555
|
+
etag = done_future.result()
|
|
556
|
+
completed_parts.append(
|
|
557
|
+
{"part_number": part_number, "etag": etag}
|
|
558
|
+
)
|
|
559
|
+
pbar.update(1)
|
|
560
|
+
except Exception as e:
|
|
561
|
+
raise Exception(f"Failed to upload part {part_number}: {e}")
|
|
562
|
+
|
|
563
|
+
# Submit next part if available
|
|
564
|
+
if part_index < len(parts):
|
|
565
|
+
part_info = parts[part_index]
|
|
566
|
+
future = self._submit_part(
|
|
567
|
+
executor, f, part_info, part_size
|
|
568
|
+
)
|
|
569
|
+
future_to_part[future] = part_info["PartNumber"]
|
|
570
|
+
part_index += 1
|
|
548
571
|
|
|
549
572
|
completed_parts.sort(key=lambda x: x["part_number"])
|
|
550
573
|
return completed_parts
|
|
@@ -3,6 +3,7 @@ from functools import cached_property
|
|
|
3
3
|
from together.resources.audio.speech import AsyncSpeech, Speech
|
|
4
4
|
from together.resources.audio.transcriptions import AsyncTranscriptions, Transcriptions
|
|
5
5
|
from together.resources.audio.translations import AsyncTranslations, Translations
|
|
6
|
+
from together.resources.audio.voices import AsyncVoices, Voices
|
|
6
7
|
from together.types import (
|
|
7
8
|
TogetherClient,
|
|
8
9
|
)
|
|
@@ -24,6 +25,10 @@ class Audio:
|
|
|
24
25
|
def translations(self) -> Translations:
|
|
25
26
|
return Translations(self._client)
|
|
26
27
|
|
|
28
|
+
@cached_property
|
|
29
|
+
def voices(self) -> Voices:
|
|
30
|
+
return Voices(self._client)
|
|
31
|
+
|
|
27
32
|
|
|
28
33
|
class AsyncAudio:
|
|
29
34
|
def __init__(self, client: TogetherClient) -> None:
|
|
@@ -40,3 +45,7 @@ class AsyncAudio:
|
|
|
40
45
|
@cached_property
|
|
41
46
|
def translations(self) -> AsyncTranslations:
|
|
42
47
|
return AsyncTranslations(self._client)
|
|
48
|
+
|
|
49
|
+
@cached_property
|
|
50
|
+
def voices(self) -> AsyncVoices:
|
|
51
|
+
return AsyncVoices(self._client)
|
|
@@ -30,7 +30,7 @@ class Speech:
|
|
|
30
30
|
response_format: str = "wav",
|
|
31
31
|
language: str = "en",
|
|
32
32
|
response_encoding: str = "pcm_f32le",
|
|
33
|
-
sample_rate: int =
|
|
33
|
+
sample_rate: int | None = None,
|
|
34
34
|
stream: bool = False,
|
|
35
35
|
**kwargs: Any,
|
|
36
36
|
) -> AudioSpeechStreamResponse:
|
|
@@ -49,7 +49,7 @@ class Speech:
|
|
|
49
49
|
response_encoding (str, optional): Audio encoding of response.
|
|
50
50
|
Defaults to "pcm_f32le".
|
|
51
51
|
sample_rate (int, optional): Sampling rate to use for the output audio.
|
|
52
|
-
Defaults to
|
|
52
|
+
Defaults to None. If not provided, the default sampling rate for the model will be used.
|
|
53
53
|
stream (bool, optional): If true, output is streamed for several characters at a time.
|
|
54
54
|
Defaults to False.
|
|
55
55
|
|
|
@@ -57,6 +57,12 @@ class Speech:
|
|
|
57
57
|
Union[bytes, Iterator[AudioSpeechStreamChunk]]: The generated audio as bytes or an iterator over audio stream chunks.
|
|
58
58
|
"""
|
|
59
59
|
|
|
60
|
+
if sample_rate is None:
|
|
61
|
+
if "cartesia" in model:
|
|
62
|
+
sample_rate = 44100
|
|
63
|
+
else:
|
|
64
|
+
sample_rate = 24000
|
|
65
|
+
|
|
60
66
|
requestor = api_requestor.APIRequestor(
|
|
61
67
|
client=self._client,
|
|
62
68
|
)
|
|
@@ -30,6 +30,7 @@ class Transcriptions:
|
|
|
30
30
|
timestamp_granularities: Optional[
|
|
31
31
|
Union[str, AudioTimestampGranularities]
|
|
32
32
|
] = None,
|
|
33
|
+
diarize: bool = False,
|
|
33
34
|
**kwargs: Any,
|
|
34
35
|
) -> Union[AudioTranscriptionResponse, AudioTranscriptionVerboseResponse]:
|
|
35
36
|
"""
|
|
@@ -52,7 +53,11 @@ class Transcriptions:
|
|
|
52
53
|
timestamp_granularities: The timestamp granularities to populate for this
|
|
53
54
|
transcription. response_format must be set verbose_json to use timestamp
|
|
54
55
|
granularities. Either or both of these options are supported: word, or segment.
|
|
55
|
-
|
|
56
|
+
diarize: Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription.
|
|
57
|
+
In the response, in the words array, you will get the speaker id for each word.
|
|
58
|
+
In addition, we also return the speaker_segments array which contains the speaker id for each speaker segment along with the start and end time of the segment along with all the words in the segment.
|
|
59
|
+
You can use the speaker_id to group the words by speaker.
|
|
60
|
+
You can use the speaker_segments to get the start and end time of each speaker segment.
|
|
56
61
|
Returns:
|
|
57
62
|
The transcribed text in the requested format.
|
|
58
63
|
"""
|
|
@@ -103,6 +108,9 @@ class Transcriptions:
|
|
|
103
108
|
else timestamp_granularities
|
|
104
109
|
)
|
|
105
110
|
|
|
111
|
+
if diarize:
|
|
112
|
+
params_data["diarize"] = diarize
|
|
113
|
+
|
|
106
114
|
# Add any additional kwargs
|
|
107
115
|
# Convert boolean values to lowercase strings for proper form encoding
|
|
108
116
|
for key, value in kwargs.items():
|
|
@@ -135,6 +143,7 @@ class Transcriptions:
|
|
|
135
143
|
if (
|
|
136
144
|
response_format == "verbose_json"
|
|
137
145
|
or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
|
|
146
|
+
or diarize
|
|
138
147
|
):
|
|
139
148
|
# Create response with model validation that preserves extra fields
|
|
140
149
|
return AudioTranscriptionVerboseResponse.model_validate(response.data)
|
|
@@ -158,6 +167,7 @@ class AsyncTranscriptions:
|
|
|
158
167
|
timestamp_granularities: Optional[
|
|
159
168
|
Union[str, AudioTimestampGranularities]
|
|
160
169
|
] = None,
|
|
170
|
+
diarize: bool = False,
|
|
161
171
|
**kwargs: Any,
|
|
162
172
|
) -> Union[AudioTranscriptionResponse, AudioTranscriptionVerboseResponse]:
|
|
163
173
|
"""
|
|
@@ -180,7 +190,11 @@ class AsyncTranscriptions:
|
|
|
180
190
|
timestamp_granularities: The timestamp granularities to populate for this
|
|
181
191
|
transcription. response_format must be set verbose_json to use timestamp
|
|
182
192
|
granularities. Either or both of these options are supported: word, or segment.
|
|
183
|
-
|
|
193
|
+
diarize: Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription.
|
|
194
|
+
In the response, in the words array, you will get the speaker id for each word.
|
|
195
|
+
In addition, we also return the speaker_segments array which contains the speaker id for each speaker segment along with the start and end time of the segment along with all the words in the segment.
|
|
196
|
+
You can use the speaker_id to group the words by speaker.
|
|
197
|
+
You can use the speaker_segments to get the start and end time of each speaker segment.
|
|
184
198
|
Returns:
|
|
185
199
|
The transcribed text in the requested format.
|
|
186
200
|
"""
|
|
@@ -239,6 +253,9 @@ class AsyncTranscriptions:
|
|
|
239
253
|
)
|
|
240
254
|
)
|
|
241
255
|
|
|
256
|
+
if diarize:
|
|
257
|
+
params_data["diarize"] = diarize
|
|
258
|
+
|
|
242
259
|
# Add any additional kwargs
|
|
243
260
|
# Convert boolean values to lowercase strings for proper form encoding
|
|
244
261
|
for key, value in kwargs.items():
|
|
@@ -271,6 +288,7 @@ class AsyncTranscriptions:
|
|
|
271
288
|
if (
|
|
272
289
|
response_format == "verbose_json"
|
|
273
290
|
or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
|
|
291
|
+
or diarize
|
|
274
292
|
):
|
|
275
293
|
# Create response with model validation that preserves extra fields
|
|
276
294
|
return AudioTranscriptionVerboseResponse.model_validate(response.data)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from together.abstract import api_requestor
|
|
4
|
+
from together.together_response import TogetherResponse
|
|
5
|
+
from together.types import (
|
|
6
|
+
TogetherClient,
|
|
7
|
+
TogetherRequest,
|
|
8
|
+
VoiceListResponse,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Voices:
|
|
13
|
+
def __init__(self, client: TogetherClient) -> None:
|
|
14
|
+
self._client = client
|
|
15
|
+
|
|
16
|
+
def list(self) -> VoiceListResponse:
|
|
17
|
+
"""
|
|
18
|
+
Method to return list of available voices on the API
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
VoiceListResponse: Response containing models and their available voices
|
|
22
|
+
"""
|
|
23
|
+
requestor = api_requestor.APIRequestor(
|
|
24
|
+
client=self._client,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
response, _, _ = requestor.request(
|
|
28
|
+
options=TogetherRequest(
|
|
29
|
+
method="GET",
|
|
30
|
+
url="voices",
|
|
31
|
+
),
|
|
32
|
+
stream=False,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
assert isinstance(response, TogetherResponse)
|
|
36
|
+
|
|
37
|
+
return VoiceListResponse(**response.data)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class AsyncVoices:
|
|
41
|
+
def __init__(self, client: TogetherClient) -> None:
|
|
42
|
+
self._client = client
|
|
43
|
+
|
|
44
|
+
async def list(self) -> VoiceListResponse:
|
|
45
|
+
"""
|
|
46
|
+
Async method to return list of available voices on the API
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
VoiceListResponse: Response containing models and their available voices
|
|
50
|
+
"""
|
|
51
|
+
requestor = api_requestor.APIRequestor(
|
|
52
|
+
client=self._client,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
response, _, _ = await requestor.arequest(
|
|
56
|
+
options=TogetherRequest(
|
|
57
|
+
method="GET",
|
|
58
|
+
url="voices",
|
|
59
|
+
),
|
|
60
|
+
stream=False,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
assert isinstance(response, TogetherResponse)
|
|
64
|
+
|
|
65
|
+
return VoiceListResponse(**response.data)
|