together 1.5.29__py3-none-any.whl → 1.5.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -619,14 +619,29 @@ class APIRequestor:
619
619
  ) -> Tuple[TogetherResponse | Iterator[TogetherResponse], bool]:
620
620
  """Returns the response(s) and a bool indicating whether it is a stream."""
621
621
  content_type = result.headers.get("Content-Type", "")
622
+
622
623
  if stream and "text/event-stream" in content_type:
624
+ # SSE format streaming
623
625
  return (
624
626
  self._interpret_response_line(
625
627
  line, result.status_code, result.headers, stream=True
626
628
  )
627
629
  for line in parse_stream(result.iter_lines())
628
630
  ), True
631
+ elif stream and content_type in [
632
+ "audio/wav",
633
+ "audio/mpeg",
634
+ "application/octet-stream",
635
+ ]:
636
+ # Binary audio streaming - return chunks as binary data
637
+ def binary_stream_generator() -> Iterator[TogetherResponse]:
638
+ for chunk in result.iter_content(chunk_size=8192):
639
+ if chunk: # Skip empty chunks
640
+ yield TogetherResponse(chunk, dict(result.headers))
641
+
642
+ return binary_stream_generator(), True
629
643
  else:
644
+ # Non-streaming response
630
645
  if content_type in ["application/octet-stream", "audio/wav", "audio/mpeg"]:
631
646
  content = result.content
632
647
  else:
@@ -648,23 +663,49 @@ class APIRequestor:
648
663
  | tuple[TogetherResponse, bool]
649
664
  ):
650
665
  """Returns the response(s) and a bool indicating whether it is a stream."""
651
- if stream and "text/event-stream" in result.headers.get("Content-Type", ""):
666
+ content_type = result.headers.get("Content-Type", "")
667
+
668
+ if stream and "text/event-stream" in content_type:
669
+ # SSE format streaming
652
670
  return (
653
671
  self._interpret_response_line(
654
672
  line, result.status, result.headers, stream=True
655
673
  )
656
674
  async for line in parse_stream_async(result.content)
657
675
  ), True
676
+ elif stream and content_type in [
677
+ "audio/wav",
678
+ "audio/mpeg",
679
+ "application/octet-stream",
680
+ ]:
681
+ # Binary audio streaming - return chunks as binary data
682
+ async def binary_stream_generator() -> (
683
+ AsyncGenerator[TogetherResponse, None]
684
+ ):
685
+ async for chunk in result.content.iter_chunked(8192):
686
+ if chunk: # Skip empty chunks
687
+ yield TogetherResponse(chunk, dict(result.headers))
688
+
689
+ return binary_stream_generator(), True
658
690
  else:
691
+ # Non-streaming response
659
692
  try:
660
- await result.read()
693
+ content = await result.read()
661
694
  except (aiohttp.ServerTimeoutError, asyncio.TimeoutError) as e:
662
695
  raise error.Timeout("Request timed out") from e
663
696
  except aiohttp.ClientError as e:
664
697
  utils.log_warn(e, body=result.content)
698
+
699
+ if content_type in ["application/octet-stream", "audio/wav", "audio/mpeg"]:
700
+ # Binary content - keep as bytes
701
+ response_content: str | bytes = content
702
+ else:
703
+ # Text content - decode to string
704
+ response_content = content.decode("utf-8")
705
+
665
706
  return (
666
707
  self._interpret_response_line(
667
- (await result.read()).decode("utf-8"),
708
+ response_content,
668
709
  result.status,
669
710
  result.headers,
670
711
  stream=False,
together/cli/api/chat.py CHANGED
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import cmd
4
4
  import json
5
- from typing import List, Tuple
5
+ from typing import Any, Dict, List, Tuple
6
6
 
7
7
  import click
8
8
 
@@ -181,6 +181,12 @@ def interactive(
181
181
  "--frequency-penalty", type=float, help="Frequency penalty sampling method"
182
182
  )
183
183
  @click.option("--min-p", type=float, help="Min p sampling")
184
+ @click.option(
185
+ "--audio-url",
186
+ type=str,
187
+ multiple=True,
188
+ help="Audio URL to attach to the last user message",
189
+ )
184
190
  @click.option("--no-stream", is_flag=True, help="Disable streaming")
185
191
  @click.option("--logprobs", type=int, help="Return logprobs. Only works with --raw.")
186
192
  @click.option("--echo", is_flag=True, help="Echo prompt. Only works with --raw.")
@@ -200,6 +206,7 @@ def chat(
200
206
  presence_penalty: float | None = None,
201
207
  frequency_penalty: float | None = None,
202
208
  min_p: float | None = None,
209
+ audio_url: List[str] | None = None,
203
210
  no_stream: bool = False,
204
211
  logprobs: int | None = None,
205
212
  echo: bool | None = None,
@@ -210,7 +217,22 @@ def chat(
210
217
  """Generate chat completions from messages"""
211
218
  client: Together = ctx.obj
212
219
 
213
- messages = [{"role": msg[0], "content": msg[1]} for msg in message]
220
+ messages: List[Dict[str, Any]] = [
221
+ {"role": msg[0], "content": msg[1]} for msg in message
222
+ ]
223
+
224
+ if audio_url and messages:
225
+ last_msg = messages[-1]
226
+ if last_msg["role"] == "user":
227
+ # Convert content to list if it is string
228
+ if isinstance(last_msg["content"], str):
229
+ last_msg["content"] = [{"type": "text", "text": last_msg["content"]}]
230
+
231
+ # Append audio URLs
232
+ for url in audio_url:
233
+ last_msg["content"].append(
234
+ {"type": "audio_url", "audio_url": {"url": url}}
235
+ )
214
236
 
215
237
  response = client.chat.completions.create(
216
238
  model=model,
@@ -133,8 +133,11 @@ def endpoints(ctx: click.Context) -> None:
133
133
  help="Number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable.",
134
134
  )
135
135
  @click.option(
136
- "--wait",
137
- is_flag=True,
136
+ "--availability-zone",
137
+ help="Start endpoint in specified availability zone (e.g., us-central-4b)",
138
+ )
139
+ @click.option(
140
+ "--wait/--no-wait",
138
141
  default=True,
139
142
  help="Wait for the endpoint to be ready after creation",
140
143
  )
@@ -152,6 +155,7 @@ def create(
152
155
  no_speculative_decoding: bool,
153
156
  no_auto_start: bool,
154
157
  inactive_timeout: int | None,
158
+ availability_zone: str | None,
155
159
  wait: bool,
156
160
  ) -> None:
157
161
  """Create a new dedicated inference endpoint."""
@@ -177,6 +181,7 @@ def create(
177
181
  disable_speculative_decoding=no_speculative_decoding,
178
182
  state="STOPPED" if no_auto_start else "STARTED",
179
183
  inactive_timeout=inactive_timeout,
184
+ availability_zone=availability_zone,
180
185
  )
181
186
  except InvalidRequestError as e:
182
187
  print_api_error(e)
@@ -203,6 +208,8 @@ def create(
203
208
  click.echo(" Auto-start: disabled", err=True)
204
209
  if inactive_timeout is not None:
205
210
  click.echo(f" Inactive timeout: {inactive_timeout} minutes", err=True)
211
+ if availability_zone:
212
+ click.echo(f" Availability zone: {availability_zone}", err=True)
206
213
 
207
214
  click.echo(f"Endpoint created successfully, id: {response.id}", err=True)
208
215
 
@@ -276,7 +283,9 @@ def fetch_and_print_hardware_options(
276
283
  @endpoints.command()
277
284
  @click.argument("endpoint-id", required=True)
278
285
  @click.option(
279
- "--wait", is_flag=True, default=True, help="Wait for the endpoint to stop"
286
+ "--wait/--no-wait",
287
+ default=True,
288
+ help="Wait for the endpoint to stop",
280
289
  )
281
290
  @click.pass_obj
282
291
  @handle_api_errors
@@ -299,7 +308,9 @@ def stop(client: Together, endpoint_id: str, wait: bool) -> None:
299
308
  @endpoints.command()
300
309
  @click.argument("endpoint-id", required=True)
301
310
  @click.option(
302
- "--wait", is_flag=True, default=True, help="Wait for the endpoint to start"
311
+ "--wait/--no-wait",
312
+ default=True,
313
+ help="Wait for the endpoint to start",
303
314
  )
304
315
  @click.pass_obj
305
316
  @handle_api_errors
@@ -337,13 +348,30 @@ def delete(client: Together, endpoint_id: str) -> None:
337
348
  type=click.Choice(["dedicated", "serverless"]),
338
349
  help="Filter by endpoint type",
339
350
  )
351
+ @click.option(
352
+ "--mine",
353
+ type=click.BOOL,
354
+ default=None,
355
+ help="true (only mine), default=all",
356
+ )
357
+ @click.option(
358
+ "--usage-type",
359
+ type=click.Choice(["on-demand", "reserved"]),
360
+ help="Filter by endpoint usage type",
361
+ )
340
362
  @click.pass_obj
341
363
  @handle_api_errors
342
364
  def list(
343
- client: Together, json: bool, type: Literal["dedicated", "serverless"] | None
365
+ client: Together,
366
+ json: bool,
367
+ type: Literal["dedicated", "serverless"] | None,
368
+ usage_type: Literal["on-demand", "reserved"] | None,
369
+ mine: bool | None,
344
370
  ) -> None:
345
371
  """List all inference endpoints (includes both dedicated and serverless endpoints)."""
346
- endpoints: List[ListEndpoint] = client.endpoints.list(type=type)
372
+ endpoints: List[ListEndpoint] = client.endpoints.list(
373
+ type=type, usage_type=usage_type, mine=mine
374
+ )
347
375
 
348
376
  if not endpoints:
349
377
  click.echo("No dedicated endpoints found", err=True)
@@ -432,3 +460,25 @@ def update(
432
460
 
433
461
  click.echo("Successfully updated endpoint", err=True)
434
462
  click.echo(endpoint_id)
463
+
464
+
465
+ @endpoints.command()
466
+ @click.option("--json", is_flag=True, help="Print output in JSON format")
467
+ @click.pass_obj
468
+ @handle_api_errors
469
+ def availability_zones(client: Together, json: bool) -> None:
470
+ """List all availability zones."""
471
+ avzones = client.endpoints.list_avzones()
472
+
473
+ if not avzones:
474
+ click.echo("No availability zones found", err=True)
475
+ return
476
+
477
+ if json:
478
+ import json as json_lib
479
+
480
+ click.echo(json_lib.dumps({"avzones": avzones}, indent=2))
481
+ else:
482
+ click.echo("Available zones:", err=True)
483
+ for availability_zone in sorted(avzones):
484
+ click.echo(f" {availability_zone}")
together/constants.py CHANGED
@@ -20,13 +20,13 @@ MAX_CONCURRENT_PARTS = 4 # Maximum concurrent parts for multipart upload
20
20
 
21
21
  # Multipart upload constants
22
22
  MIN_PART_SIZE_MB = 5 # Minimum part size (S3 requirement)
23
- TARGET_PART_SIZE_MB = 100 # Target part size for optimal performance
24
- MAX_MULTIPART_PARTS = 250 # Maximum parts per upload (S3 limit)
23
+ TARGET_PART_SIZE_MB = 250 # Target part size
24
+ MAX_MULTIPART_PARTS = 250 # Maximum parts per upload
25
25
  MULTIPART_UPLOAD_TIMEOUT = 300 # Timeout in seconds for uploading each part
26
26
  MULTIPART_THRESHOLD_GB = 5.0 # threshold for switching to multipart upload
27
27
 
28
28
  # maximum number of GB sized files we support finetuning for
29
- MAX_FILE_SIZE_GB = 25.0
29
+ MAX_FILE_SIZE_GB = 50.1
30
30
 
31
31
 
32
32
  # Messages
together/filemanager.py CHANGED
@@ -6,10 +6,10 @@ import shutil
6
6
  import stat
7
7
  import tempfile
8
8
  import uuid
9
- from concurrent.futures import ThreadPoolExecutor, as_completed
9
+ from concurrent.futures import Future, ThreadPoolExecutor, as_completed
10
10
  from functools import partial
11
11
  from pathlib import Path
12
- from typing import Any, Dict, List, Tuple
12
+ from typing import Any, BinaryIO, Dict, List, Tuple
13
13
 
14
14
  import requests
15
15
  from filelock import FileLock
@@ -212,6 +212,7 @@ class DownloadManager:
212
212
  ),
213
213
  remaining_retries=MAX_RETRIES,
214
214
  stream=True,
215
+ request_timeout=3600,
215
216
  )
216
217
 
217
218
  try:
@@ -512,6 +513,18 @@ class MultipartUploadManager:
512
513
 
513
514
  return response.data
514
515
 
516
+ def _submit_part(
517
+ self,
518
+ executor: ThreadPoolExecutor,
519
+ f: BinaryIO,
520
+ part_info: Dict[str, Any],
521
+ part_size: int,
522
+ ) -> Future[str]:
523
+ """Submit a single part for upload and return the future"""
524
+ f.seek((part_info["PartNumber"] - 1) * part_size)
525
+ part_data = f.read(part_size)
526
+ return executor.submit(self._upload_single_part, part_info, part_data)
527
+
515
528
  def _upload_parts_concurrent(
516
529
  self, file: Path, upload_info: Dict[str, Any], part_size: int
517
530
  ) -> List[Dict[str, Any]]:
@@ -522,29 +535,39 @@ class MultipartUploadManager:
522
535
 
523
536
  with ThreadPoolExecutor(max_workers=self.max_concurrent_parts) as executor:
524
537
  with tqdm(total=len(parts), desc="Uploading parts", unit="part") as pbar:
525
- future_to_part = {}
526
-
527
538
  with open(file, "rb") as f:
528
- for part_info in parts:
529
- f.seek((part_info["PartNumber"] - 1) * part_size)
530
- part_data = f.read(part_size)
539
+ future_to_part = {}
540
+ part_index = 0
531
541
 
532
- future = executor.submit(
533
- self._upload_single_part, part_info, part_data
534
- )
542
+ # Submit initial batch limited by max_concurrent_parts
543
+ for _ in range(min(self.max_concurrent_parts, len(parts))):
544
+ part_info = parts[part_index]
545
+ future = self._submit_part(executor, f, part_info, part_size)
535
546
  future_to_part[future] = part_info["PartNumber"]
536
-
537
- # Collect results
538
- for future in as_completed(future_to_part):
539
- part_number = future_to_part[future]
540
- try:
541
- etag = future.result()
542
- completed_parts.append(
543
- {"part_number": part_number, "etag": etag}
544
- )
545
- pbar.update(1)
546
- except Exception as e:
547
- raise Exception(f"Failed to upload part {part_number}: {e}")
547
+ part_index += 1
548
+
549
+ # Process completions and submit new parts (sliding window)
550
+ while future_to_part:
551
+ done_future = next(as_completed(future_to_part))
552
+ part_number = future_to_part.pop(done_future)
553
+
554
+ try:
555
+ etag = done_future.result()
556
+ completed_parts.append(
557
+ {"part_number": part_number, "etag": etag}
558
+ )
559
+ pbar.update(1)
560
+ except Exception as e:
561
+ raise Exception(f"Failed to upload part {part_number}: {e}")
562
+
563
+ # Submit next part if available
564
+ if part_index < len(parts):
565
+ part_info = parts[part_index]
566
+ future = self._submit_part(
567
+ executor, f, part_info, part_size
568
+ )
569
+ future_to_part[future] = part_info["PartNumber"]
570
+ part_index += 1
548
571
 
549
572
  completed_parts.sort(key=lambda x: x["part_number"])
550
573
  return completed_parts
@@ -3,6 +3,7 @@ from functools import cached_property
3
3
  from together.resources.audio.speech import AsyncSpeech, Speech
4
4
  from together.resources.audio.transcriptions import AsyncTranscriptions, Transcriptions
5
5
  from together.resources.audio.translations import AsyncTranslations, Translations
6
+ from together.resources.audio.voices import AsyncVoices, Voices
6
7
  from together.types import (
7
8
  TogetherClient,
8
9
  )
@@ -24,6 +25,10 @@ class Audio:
24
25
  def translations(self) -> Translations:
25
26
  return Translations(self._client)
26
27
 
28
+ @cached_property
29
+ def voices(self) -> Voices:
30
+ return Voices(self._client)
31
+
27
32
 
28
33
  class AsyncAudio:
29
34
  def __init__(self, client: TogetherClient) -> None:
@@ -40,3 +45,7 @@ class AsyncAudio:
40
45
  @cached_property
41
46
  def translations(self) -> AsyncTranslations:
42
47
  return AsyncTranslations(self._client)
48
+
49
+ @cached_property
50
+ def voices(self) -> AsyncVoices:
51
+ return AsyncVoices(self._client)
@@ -30,7 +30,7 @@ class Speech:
30
30
  response_format: str = "wav",
31
31
  language: str = "en",
32
32
  response_encoding: str = "pcm_f32le",
33
- sample_rate: int = 44100,
33
+ sample_rate: int | None = None,
34
34
  stream: bool = False,
35
35
  **kwargs: Any,
36
36
  ) -> AudioSpeechStreamResponse:
@@ -49,7 +49,7 @@ class Speech:
49
49
  response_encoding (str, optional): Audio encoding of response.
50
50
  Defaults to "pcm_f32le".
51
51
  sample_rate (int, optional): Sampling rate to use for the output audio.
52
- Defaults to 44100.
52
+ Defaults to None. If not provided, the default sampling rate for the model will be used.
53
53
  stream (bool, optional): If true, output is streamed for several characters at a time.
54
54
  Defaults to False.
55
55
 
@@ -57,6 +57,12 @@ class Speech:
57
57
  Union[bytes, Iterator[AudioSpeechStreamChunk]]: The generated audio as bytes or an iterator over audio stream chunks.
58
58
  """
59
59
 
60
+ if sample_rate is None:
61
+ if "cartesia" in model:
62
+ sample_rate = 44100
63
+ else:
64
+ sample_rate = 24000
65
+
60
66
  requestor = api_requestor.APIRequestor(
61
67
  client=self._client,
62
68
  )
@@ -30,6 +30,7 @@ class Transcriptions:
30
30
  timestamp_granularities: Optional[
31
31
  Union[str, AudioTimestampGranularities]
32
32
  ] = None,
33
+ diarize: bool = False,
33
34
  **kwargs: Any,
34
35
  ) -> Union[AudioTranscriptionResponse, AudioTranscriptionVerboseResponse]:
35
36
  """
@@ -52,7 +53,11 @@ class Transcriptions:
52
53
  timestamp_granularities: The timestamp granularities to populate for this
53
54
  transcription. response_format must be set verbose_json to use timestamp
54
55
  granularities. Either or both of these options are supported: word, or segment.
55
-
56
+ diarize: Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription.
57
+ In the response, in the words array, you will get the speaker id for each word.
58
+ In addition, we also return the speaker_segments array which contains the speaker id for each speaker segment along with the start and end time of the segment along with all the words in the segment.
59
+ You can use the speaker_id to group the words by speaker.
60
+ You can use the speaker_segments to get the start and end time of each speaker segment.
56
61
  Returns:
57
62
  The transcribed text in the requested format.
58
63
  """
@@ -103,6 +108,9 @@ class Transcriptions:
103
108
  else timestamp_granularities
104
109
  )
105
110
 
111
+ if diarize:
112
+ params_data["diarize"] = diarize
113
+
106
114
  # Add any additional kwargs
107
115
  # Convert boolean values to lowercase strings for proper form encoding
108
116
  for key, value in kwargs.items():
@@ -135,6 +143,7 @@ class Transcriptions:
135
143
  if (
136
144
  response_format == "verbose_json"
137
145
  or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
146
+ or diarize
138
147
  ):
139
148
  # Create response with model validation that preserves extra fields
140
149
  return AudioTranscriptionVerboseResponse.model_validate(response.data)
@@ -158,6 +167,7 @@ class AsyncTranscriptions:
158
167
  timestamp_granularities: Optional[
159
168
  Union[str, AudioTimestampGranularities]
160
169
  ] = None,
170
+ diarize: bool = False,
161
171
  **kwargs: Any,
162
172
  ) -> Union[AudioTranscriptionResponse, AudioTranscriptionVerboseResponse]:
163
173
  """
@@ -180,7 +190,11 @@ class AsyncTranscriptions:
180
190
  timestamp_granularities: The timestamp granularities to populate for this
181
191
  transcription. response_format must be set verbose_json to use timestamp
182
192
  granularities. Either or both of these options are supported: word, or segment.
183
-
193
+ diarize: Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription.
194
+ In the response, in the words array, you will get the speaker id for each word.
195
+ In addition, we also return the speaker_segments array which contains the speaker id for each speaker segment along with the start and end time of the segment along with all the words in the segment.
196
+ You can use the speaker_id to group the words by speaker.
197
+ You can use the speaker_segments to get the start and end time of each speaker segment.
184
198
  Returns:
185
199
  The transcribed text in the requested format.
186
200
  """
@@ -239,6 +253,9 @@ class AsyncTranscriptions:
239
253
  )
240
254
  )
241
255
 
256
+ if diarize:
257
+ params_data["diarize"] = diarize
258
+
242
259
  # Add any additional kwargs
243
260
  # Convert boolean values to lowercase strings for proper form encoding
244
261
  for key, value in kwargs.items():
@@ -271,6 +288,7 @@ class AsyncTranscriptions:
271
288
  if (
272
289
  response_format == "verbose_json"
273
290
  or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
291
+ or diarize
274
292
  ):
275
293
  # Create response with model validation that preserves extra fields
276
294
  return AudioTranscriptionVerboseResponse.model_validate(response.data)
@@ -0,0 +1,65 @@
1
+ from __future__ import annotations
2
+
3
+ from together.abstract import api_requestor
4
+ from together.together_response import TogetherResponse
5
+ from together.types import (
6
+ TogetherClient,
7
+ TogetherRequest,
8
+ VoiceListResponse,
9
+ )
10
+
11
+
12
+ class Voices:
13
+ def __init__(self, client: TogetherClient) -> None:
14
+ self._client = client
15
+
16
+ def list(self) -> VoiceListResponse:
17
+ """
18
+ Method to return list of available voices on the API
19
+
20
+ Returns:
21
+ VoiceListResponse: Response containing models and their available voices
22
+ """
23
+ requestor = api_requestor.APIRequestor(
24
+ client=self._client,
25
+ )
26
+
27
+ response, _, _ = requestor.request(
28
+ options=TogetherRequest(
29
+ method="GET",
30
+ url="voices",
31
+ ),
32
+ stream=False,
33
+ )
34
+
35
+ assert isinstance(response, TogetherResponse)
36
+
37
+ return VoiceListResponse(**response.data)
38
+
39
+
40
+ class AsyncVoices:
41
+ def __init__(self, client: TogetherClient) -> None:
42
+ self._client = client
43
+
44
+ async def list(self) -> VoiceListResponse:
45
+ """
46
+ Async method to return list of available voices on the API
47
+
48
+ Returns:
49
+ VoiceListResponse: Response containing models and their available voices
50
+ """
51
+ requestor = api_requestor.APIRequestor(
52
+ client=self._client,
53
+ )
54
+
55
+ response, _, _ = await requestor.arequest(
56
+ options=TogetherRequest(
57
+ method="GET",
58
+ url="voices",
59
+ ),
60
+ stream=False,
61
+ )
62
+
63
+ assert isinstance(response, TogetherResponse)
64
+
65
+ return VoiceListResponse(**response.data)