PyPI - together - Versions diffs - 1.5.29__py3-none-any.whl → 1.5.31__py3-none-any.whl - Mend

together 1.5.29py3-none-any.whl → 1.5.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

together/abstract/api_requestor.py +44 -3
together/cli/api/chat.py +24 -2
together/cli/api/endpoints.py +56 -6
together/constants.py +3 -3
together/filemanager.py +45 -22
together/resources/audio/__init__.py +9 -0
together/resources/audio/speech.py +8 -2
together/resources/audio/transcriptions.py +20 -2
together/resources/audio/voices.py +65 -0
together/resources/endpoints.py +98 -7
together/types/__init__.py +4 -0
together/types/audio_speech.py +127 -14
together/types/chat_completions.py +6 -0
together/types/common.py +1 -0
together/types/files.py +1 -0
together/utils/files.py +183 -54
{together-1.5.29.dist-info → together-1.5.31.dist-info}/METADATA +2 -1
{together-1.5.29.dist-info → together-1.5.31.dist-info}/RECORD +21 -20
{together-1.5.29.dist-info → together-1.5.31.dist-info}/WHEEL +0 -0
{together-1.5.29.dist-info → together-1.5.31.dist-info}/entry_points.txt +0 -0
{together-1.5.29.dist-info → together-1.5.31.dist-info}/licenses/LICENSE +0 -0

together/abstract/api_requestor.py CHANGED Viewed

@@ -619,14 +619,29 @@ class APIRequestor:
     ) -> Tuple[TogetherResponse | Iterator[TogetherResponse], bool]:
         """Returns the response(s) and a bool indicating whether it is a stream."""
         content_type = result.headers.get("Content-Type", "")
         if stream and "text/event-stream" in content_type:
+            # SSE format streaming
             return (
                 self._interpret_response_line(
                     line, result.status_code, result.headers, stream=True
                 )
                 for line in parse_stream(result.iter_lines())
             ), True
+        elif stream and content_type in [
+            "audio/wav",
+            "audio/mpeg",
+            "application/octet-stream",
+        ]:
+            # Binary audio streaming - return chunks as binary data
+            def binary_stream_generator() -> Iterator[TogetherResponse]:
+                for chunk in result.iter_content(chunk_size=8192):
+                    if chunk:  # Skip empty chunks
+                        yield TogetherResponse(chunk, dict(result.headers))
+            return binary_stream_generator(), True
         else:
+            # Non-streaming response
             if content_type in ["application/octet-stream", "audio/wav", "audio/mpeg"]:
                 content = result.content
             else:
@@ -648,23 +663,49 @@ class APIRequestor:
         | tuple[TogetherResponse, bool]
     ):
         """Returns the response(s) and a bool indicating whether it is a stream."""
-        if stream and "text/event-stream" in result.headers.get("Content-Type", ""):
+        content_type = result.headers.get("Content-Type", "")
+        if stream and "text/event-stream" in content_type:
+            # SSE format streaming
             return (
                 self._interpret_response_line(
                     line, result.status, result.headers, stream=True
                 )
                 async for line in parse_stream_async(result.content)
             ), True
+        elif stream and content_type in [
+            "audio/wav",
+            "audio/mpeg",
+            "application/octet-stream",
+        ]:
+            # Binary audio streaming - return chunks as binary data
+            async def binary_stream_generator() -> (
+                AsyncGenerator[TogetherResponse, None]
+            ):
+                async for chunk in result.content.iter_chunked(8192):
+                    if chunk:  # Skip empty chunks
+                        yield TogetherResponse(chunk, dict(result.headers))
+            return binary_stream_generator(), True
         else:
+            # Non-streaming response
             try:
-                await result.read()
+                content = await result.read()
             except (aiohttp.ServerTimeoutError, asyncio.TimeoutError) as e:
                 raise error.Timeout("Request timed out") from e
             except aiohttp.ClientError as e:
                 utils.log_warn(e, body=result.content)
+            if content_type in ["application/octet-stream", "audio/wav", "audio/mpeg"]:
+                # Binary content - keep as bytes
+                response_content: str | bytes = content
+            else:
+                # Text content - decode to string
+                response_content = content.decode("utf-8")
             return (
                 self._interpret_response_line(
-                    (await result.read()).decode("utf-8"),
+                    response_content,
                     result.status,
                     result.headers,
                     stream=False,

together/cli/api/chat.py CHANGED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 import cmd
 import json
-from typing import List, Tuple
+from typing import Any, Dict, List, Tuple
 import click
@@ -181,6 +181,12 @@ def interactive(
     "--frequency-penalty", type=float, help="Frequency penalty sampling method"
 )
 @click.option("--min-p", type=float, help="Min p sampling")
+@click.option(
+    "--audio-url",
+    type=str,
+    multiple=True,
+    help="Audio URL to attach to the last user message",
+)
 @click.option("--no-stream", is_flag=True, help="Disable streaming")
 @click.option("--logprobs", type=int, help="Return logprobs. Only works with --raw.")
 @click.option("--echo", is_flag=True, help="Echo prompt. Only works with --raw.")
@@ -200,6 +206,7 @@ def chat(
     presence_penalty: float | None = None,
     frequency_penalty: float | None = None,
     min_p: float | None = None,
+    audio_url: List[str] | None = None,
     no_stream: bool = False,
     logprobs: int | None = None,
     echo: bool | None = None,
@@ -210,7 +217,22 @@ def chat(
     """Generate chat completions from messages"""
     client: Together = ctx.obj
-    messages = [{"role": msg[0], "content": msg[1]} for msg in message]
+    messages: List[Dict[str, Any]] = [
+        {"role": msg[0], "content": msg[1]} for msg in message
+    ]
+    if audio_url and messages:
+        last_msg = messages[-1]
+        if last_msg["role"] == "user":
+            # Convert content to list if it is string
+            if isinstance(last_msg["content"], str):
+                last_msg["content"] = [{"type": "text", "text": last_msg["content"]}]
+            # Append audio URLs
+            for url in audio_url:
+                last_msg["content"].append(
+                    {"type": "audio_url", "audio_url": {"url": url}}
+                )
     response = client.chat.completions.create(
         model=model,

together/cli/api/endpoints.py CHANGED Viewed

@@ -133,8 +133,11 @@ def endpoints(ctx: click.Context) -> None:
     help="Number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable.",
 )
 @click.option(
-    "--wait",
-    is_flag=True,
+    "--availability-zone",
+    help="Start endpoint in specified availability zone (e.g., us-central-4b)",
+)
+@click.option(
+    "--wait/--no-wait",
     default=True,
     help="Wait for the endpoint to be ready after creation",
 )
@@ -152,6 +155,7 @@ def create(
     no_speculative_decoding: bool,
     no_auto_start: bool,
     inactive_timeout: int | None,
+    availability_zone: str | None,
     wait: bool,
 ) -> None:
     """Create a new dedicated inference endpoint."""
@@ -177,6 +181,7 @@ def create(
             disable_speculative_decoding=no_speculative_decoding,
             state="STOPPED" if no_auto_start else "STARTED",
             inactive_timeout=inactive_timeout,
+            availability_zone=availability_zone,
         )
     except InvalidRequestError as e:
         print_api_error(e)
@@ -203,6 +208,8 @@ def create(
         click.echo("  Auto-start: disabled", err=True)
     if inactive_timeout is not None:
         click.echo(f"  Inactive timeout: {inactive_timeout} minutes", err=True)
+    if availability_zone:
+        click.echo(f"  Availability zone: {availability_zone}", err=True)
     click.echo(f"Endpoint created successfully, id: {response.id}", err=True)
@@ -276,7 +283,9 @@ def fetch_and_print_hardware_options(
 @endpoints.command()
 @click.argument("endpoint-id", required=True)
 @click.option(
-    "--wait", is_flag=True, default=True, help="Wait for the endpoint to stop"
+    "--wait/--no-wait",
+    default=True,
+    help="Wait for the endpoint to stop",
 )
 @click.pass_obj
 @handle_api_errors
@@ -299,7 +308,9 @@ def stop(client: Together, endpoint_id: str, wait: bool) -> None:
 @endpoints.command()
 @click.argument("endpoint-id", required=True)
 @click.option(
-    "--wait", is_flag=True, default=True, help="Wait for the endpoint to start"
+    "--wait/--no-wait",
+    default=True,
+    help="Wait for the endpoint to start",
 )
 @click.pass_obj
 @handle_api_errors
@@ -337,13 +348,30 @@ def delete(client: Together, endpoint_id: str) -> None:
     type=click.Choice(["dedicated", "serverless"]),
     help="Filter by endpoint type",
 )
+@click.option(
+    "--mine",
+    type=click.BOOL,
+    default=None,
+    help="true (only mine), default=all",
+)
+@click.option(
+    "--usage-type",
+    type=click.Choice(["on-demand", "reserved"]),
+    help="Filter by endpoint usage type",
+)
 @click.pass_obj
 @handle_api_errors
 def list(
-    client: Together, json: bool, type: Literal["dedicated", "serverless"] | None
+    client: Together,
+    json: bool,
+    type: Literal["dedicated", "serverless"] | None,
+    usage_type: Literal["on-demand", "reserved"] | None,
+    mine: bool | None,
 ) -> None:
     """List all inference endpoints (includes both dedicated and serverless endpoints)."""
-    endpoints: List[ListEndpoint] = client.endpoints.list(type=type)
+    endpoints: List[ListEndpoint] = client.endpoints.list(
+        type=type, usage_type=usage_type, mine=mine
+    )
     if not endpoints:
         click.echo("No dedicated endpoints found", err=True)
@@ -432,3 +460,25 @@ def update(
     click.echo("Successfully updated endpoint", err=True)
     click.echo(endpoint_id)
+@endpoints.command()
+@click.option("--json", is_flag=True, help="Print output in JSON format")
+@click.pass_obj
+@handle_api_errors
+def availability_zones(client: Together, json: bool) -> None:
+    """List all availability zones."""
+    avzones = client.endpoints.list_avzones()
+    if not avzones:
+        click.echo("No availability zones found", err=True)
+        return
+    if json:
+        import json as json_lib
+        click.echo(json_lib.dumps({"avzones": avzones}, indent=2))
+    else:
+        click.echo("Available zones:", err=True)
+        for availability_zone in sorted(avzones):
+            click.echo(f"  {availability_zone}")

together/constants.py CHANGED Viewed

@@ -20,13 +20,13 @@ MAX_CONCURRENT_PARTS = 4  # Maximum concurrent parts for multipart upload
 # Multipart upload constants
 MIN_PART_SIZE_MB = 5  # Minimum part size (S3 requirement)
-TARGET_PART_SIZE_MB = 100  # Target part size for optimal performance
-MAX_MULTIPART_PARTS = 250  # Maximum parts per upload (S3 limit)
+TARGET_PART_SIZE_MB = 250  # Target part size
+MAX_MULTIPART_PARTS = 250  # Maximum parts per upload
 MULTIPART_UPLOAD_TIMEOUT = 300  # Timeout in seconds for uploading each part
 MULTIPART_THRESHOLD_GB = 5.0  # threshold for switching to multipart upload
 # maximum number of GB sized files we support finetuning for
-MAX_FILE_SIZE_GB = 25.0
+MAX_FILE_SIZE_GB = 50.1
 # Messages

together/filemanager.py CHANGED Viewed

@@ -6,10 +6,10 @@ import shutil
 import stat
 import tempfile
 import uuid
-from concurrent.futures import ThreadPoolExecutor, as_completed
+from concurrent.futures import Future, ThreadPoolExecutor, as_completed
 from functools import partial
 from pathlib import Path
-from typing import Any, Dict, List, Tuple
+from typing import Any, BinaryIO, Dict, List, Tuple
 import requests
 from filelock import FileLock
@@ -212,6 +212,7 @@ class DownloadManager:
                     ),
                     remaining_retries=MAX_RETRIES,
                     stream=True,
+                    request_timeout=3600,
                 )
                 try:
@@ -512,6 +513,18 @@ class MultipartUploadManager:
         return response.data
+    def _submit_part(
+        self,
+        executor: ThreadPoolExecutor,
+        f: BinaryIO,
+        part_info: Dict[str, Any],
+        part_size: int,
+    ) -> Future[str]:
+        """Submit a single part for upload and return the future"""
+        f.seek((part_info["PartNumber"] - 1) * part_size)
+        part_data = f.read(part_size)
+        return executor.submit(self._upload_single_part, part_info, part_data)
     def _upload_parts_concurrent(
         self, file: Path, upload_info: Dict[str, Any], part_size: int
     ) -> List[Dict[str, Any]]:
@@ -522,29 +535,39 @@ class MultipartUploadManager:
         with ThreadPoolExecutor(max_workers=self.max_concurrent_parts) as executor:
             with tqdm(total=len(parts), desc="Uploading parts", unit="part") as pbar:
-                future_to_part = {}
                 with open(file, "rb") as f:
-                    for part_info in parts:
-                        f.seek((part_info["PartNumber"] - 1) * part_size)
-                        part_data = f.read(part_size)
+                    future_to_part = {}
+                    part_index = 0
-                        future = executor.submit(
-                            self._upload_single_part, part_info, part_data
-                        )
+                    # Submit initial batch limited by max_concurrent_parts
+                    for _ in range(min(self.max_concurrent_parts, len(parts))):
+                        part_info = parts[part_index]
+                        future = self._submit_part(executor, f, part_info, part_size)
                         future_to_part[future] = part_info["PartNumber"]
-                # Collect results
-                for future in as_completed(future_to_part):
-                    part_number = future_to_part[future]
-                    try:
-                        etag = future.result()
-                        completed_parts.append(
-                            {"part_number": part_number, "etag": etag}
-                        )
-                        pbar.update(1)
-                    except Exception as e:
-                        raise Exception(f"Failed to upload part {part_number}: {e}")
+                        part_index += 1
+                    # Process completions and submit new parts (sliding window)
+                    while future_to_part:
+                        done_future = next(as_completed(future_to_part))
+                        part_number = future_to_part.pop(done_future)
+                        try:
+                            etag = done_future.result()
+                            completed_parts.append(
+                                {"part_number": part_number, "etag": etag}
+                            )
+                            pbar.update(1)
+                        except Exception as e:
+                            raise Exception(f"Failed to upload part {part_number}: {e}")
+                        # Submit next part if available
+                        if part_index < len(parts):
+                            part_info = parts[part_index]
+                            future = self._submit_part(
+                                executor, f, part_info, part_size
+                            )
+                            future_to_part[future] = part_info["PartNumber"]
+                            part_index += 1
         completed_parts.sort(key=lambda x: x["part_number"])
         return completed_parts

together/resources/audio/__init__.py CHANGED Viewed

@@ -3,6 +3,7 @@ from functools import cached_property
 from together.resources.audio.speech import AsyncSpeech, Speech
 from together.resources.audio.transcriptions import AsyncTranscriptions, Transcriptions
 from together.resources.audio.translations import AsyncTranslations, Translations
+from together.resources.audio.voices import AsyncVoices, Voices
 from together.types import (
     TogetherClient,
 )
@@ -24,6 +25,10 @@ class Audio:
     def translations(self) -> Translations:
         return Translations(self._client)
+    @cached_property
+    def voices(self) -> Voices:
+        return Voices(self._client)
 class AsyncAudio:
     def __init__(self, client: TogetherClient) -> None:
@@ -40,3 +45,7 @@ class AsyncAudio:
     @cached_property
     def translations(self) -> AsyncTranslations:
         return AsyncTranslations(self._client)
+    @cached_property
+    def voices(self) -> AsyncVoices:
+        return AsyncVoices(self._client)

together/resources/audio/speech.py CHANGED Viewed

@@ -30,7 +30,7 @@ class Speech:
         response_format: str = "wav",
         language: str = "en",
         response_encoding: str = "pcm_f32le",
-        sample_rate: int = 44100,
+        sample_rate: int | None = None,
         stream: bool = False,
         **kwargs: Any,
     ) -> AudioSpeechStreamResponse:
@@ -49,7 +49,7 @@ class Speech:
             response_encoding (str, optional): Audio encoding of response.
                 Defaults to "pcm_f32le".
             sample_rate (int, optional): Sampling rate to use for the output audio.
-                Defaults to 44100.
+                Defaults to None. If not provided, the default sampling rate for the model will be used.
             stream (bool, optional): If true, output is streamed for several characters at a time.
                 Defaults to False.
@@ -57,6 +57,12 @@ class Speech:
             Union[bytes, Iterator[AudioSpeechStreamChunk]]: The generated audio as bytes or an iterator over audio stream chunks.
         """
+        if sample_rate is None:
+            if "cartesia" in model:
+                sample_rate = 44100
+            else:
+                sample_rate = 24000
         requestor = api_requestor.APIRequestor(
             client=self._client,
         )

together/resources/audio/transcriptions.py CHANGED Viewed

@@ -30,6 +30,7 @@ class Transcriptions:
         timestamp_granularities: Optional[
             Union[str, AudioTimestampGranularities]
         ] = None,
+        diarize: bool = False,
         **kwargs: Any,
     ) -> Union[AudioTranscriptionResponse, AudioTranscriptionVerboseResponse]:
         """
@@ -52,7 +53,11 @@ class Transcriptions:
             timestamp_granularities: The timestamp granularities to populate for this
                 transcription. response_format must be set verbose_json to use timestamp
                 granularities. Either or both of these options are supported: word, or segment.
+            diarize: Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription.
+                In the response, in the words array, you will get the speaker id for each word.
+                In addition, we also return the speaker_segments array which contains the speaker id for each speaker segment along with the start and end time of the segment along with all the words in the segment.
+                You can use the speaker_id to group the words by speaker.
+                You can use the speaker_segments to get the start and end time of each speaker segment.
         Returns:
             The transcribed text in the requested format.
         """
@@ -103,6 +108,9 @@ class Transcriptions:
                 else timestamp_granularities
             )
+        if diarize:
+            params_data["diarize"] = diarize
         # Add any additional kwargs
         # Convert boolean values to lowercase strings for proper form encoding
         for key, value in kwargs.items():
@@ -135,6 +143,7 @@ class Transcriptions:
         if (
             response_format == "verbose_json"
             or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
+            or diarize
         ):
             # Create response with model validation that preserves extra fields
             return AudioTranscriptionVerboseResponse.model_validate(response.data)
@@ -158,6 +167,7 @@ class AsyncTranscriptions:
         timestamp_granularities: Optional[
             Union[str, AudioTimestampGranularities]
         ] = None,
+        diarize: bool = False,
         **kwargs: Any,
     ) -> Union[AudioTranscriptionResponse, AudioTranscriptionVerboseResponse]:
         """
@@ -180,7 +190,11 @@ class AsyncTranscriptions:
             timestamp_granularities: The timestamp granularities to populate for this
                 transcription. response_format must be set verbose_json to use timestamp
                 granularities. Either or both of these options are supported: word, or segment.
+            diarize: Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription.
+                In the response, in the words array, you will get the speaker id for each word.
+                In addition, we also return the speaker_segments array which contains the speaker id for each speaker segment along with the start and end time of the segment along with all the words in the segment.
+                You can use the speaker_id to group the words by speaker.
+                You can use the speaker_segments to get the start and end time of each speaker segment.
         Returns:
             The transcribed text in the requested format.
         """
@@ -239,6 +253,9 @@ class AsyncTranscriptions:
                 )
             )
+        if diarize:
+            params_data["diarize"] = diarize
         # Add any additional kwargs
         # Convert boolean values to lowercase strings for proper form encoding
         for key, value in kwargs.items():
@@ -271,6 +288,7 @@ class AsyncTranscriptions:
         if (
             response_format == "verbose_json"
             or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
+            or diarize
         ):
             # Create response with model validation that preserves extra fields
             return AudioTranscriptionVerboseResponse.model_validate(response.data)

together/resources/audio/voices.py ADDED Viewed

@@ -0,0 +1,65 @@
+from __future__ import annotations
+from together.abstract import api_requestor
+from together.together_response import TogetherResponse
+from together.types import (
+    TogetherClient,
+    TogetherRequest,
+    VoiceListResponse,
+)
+class Voices:
+    def __init__(self, client: TogetherClient) -> None:
+        self._client = client
+    def list(self) -> VoiceListResponse:
+        """
+        Method to return list of available voices on the API
+        Returns:
+            VoiceListResponse: Response containing models and their available voices
+        """
+        requestor = api_requestor.APIRequestor(
+            client=self._client,
+        )
+        response, _, _ = requestor.request(
+            options=TogetherRequest(
+                method="GET",
+                url="voices",
+            ),
+            stream=False,
+        )
+        assert isinstance(response, TogetherResponse)
+        return VoiceListResponse(**response.data)
+class AsyncVoices:
+    def __init__(self, client: TogetherClient) -> None:
+        self._client = client
+    async def list(self) -> VoiceListResponse:
+        """
+        Async method to return list of available voices on the API
+        Returns:
+            VoiceListResponse: Response containing models and their available voices
+        """
+        requestor = api_requestor.APIRequestor(
+            client=self._client,
+        )
+        response, _, _ = await requestor.arequest(
+            options=TogetherRequest(
+                method="GET",
+                url="voices",
+            ),
+            stream=False,
+        )
+        assert isinstance(response, TogetherResponse)
+        return VoiceListResponse(**response.data)

together 1.5.29__py3-none-any.whl → 1.5.31__py3-none-any.whl

together 1.5.29py3-none-any.whl → 1.5.31py3-none-any.whl