PyPI - together - Versions diffs - 1.4.0__py3-none-any.whl → 1.4.4__py3-none-any.whl - Mend

together 1.4.0py3-none-any.whl → 1.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

together/abstract/api_requestor.py +7 -9
together/cli/api/endpoints.py +415 -0
together/cli/api/finetune.py +67 -5
together/cli/cli.py +2 -0
together/client.py +1 -0
together/constants.py +6 -0
together/error.py +3 -0
together/legacy/finetune.py +1 -1
together/resources/__init__.py +4 -1
together/resources/endpoints.py +488 -0
together/resources/finetune.py +173 -15
together/types/__init__.py +25 -20
together/types/chat_completions.py +6 -0
together/types/endpoints.py +123 -0
together/types/finetune.py +45 -0
together/utils/__init__.py +4 -0
together/utils/files.py +139 -66
together/utils/tools.py +53 -2
{together-1.4.0.dist-info → together-1.4.4.dist-info}/METADATA +93 -23
{together-1.4.0.dist-info → together-1.4.4.dist-info}/RECORD +23 -20
{together-1.4.0.dist-info → together-1.4.4.dist-info}/WHEEL +1 -1
{together-1.4.0.dist-info → together-1.4.4.dist-info}/LICENSE +0 -0
{together-1.4.0.dist-info → together-1.4.4.dist-info}/entry_points.txt +0 -0

together/abstract/api_requestor.py CHANGED Viewed

@@ -437,7 +437,7 @@ class APIRequestor:
                     [(k, v) for k, v in options.params.items() if v is not None]
                 )
                 abs_url = _build_api_url(abs_url, encoded_params)
-        elif options.method.lower() in {"post", "put"}:
+        elif options.method.lower() in {"post", "put", "patch"}:
             if options.params and (options.files or options.override_headers):
                 data = options.params
             elif options.params and not options.files:
@@ -587,16 +587,14 @@ class APIRequestor:
             )
             headers["Content-Type"] = content_type
-        request_kwargs = {
-            "headers": headers,
-            "data": data,
-            "timeout": timeout,
-            "allow_redirects": options.allow_redirects,
-        }
         try:
             result = await session.request(
-                method=options.method, url=abs_url, **request_kwargs
+                method=options.method,
+                url=abs_url,
+                headers=headers,
+                data=data,
+                timeout=timeout,
+                allow_redirects=options.allow_redirects,
             )
             utils.log_debug(
                 "Together API response",

together/cli/api/endpoints.py ADDED Viewed

@@ -0,0 +1,415 @@
+from __future__ import annotations
+import json
+import sys
+from functools import wraps
+from typing import Any, Callable, Dict, List, Literal, TypeVar, Union
+import click
+from together import Together
+from together.error import InvalidRequestError
+from together.types import DedicatedEndpoint, ListEndpoint
+def print_endpoint(
+    endpoint: Union[DedicatedEndpoint, ListEndpoint],
+) -> None:
+    """Print endpoint details in a Docker-like format or JSON."""
+    # Print header info
+    click.echo(f"ID:\t\t{endpoint.id}")
+    click.echo(f"Name:\t\t{endpoint.name}")
+    # Print type-specific fields
+    if isinstance(endpoint, DedicatedEndpoint):
+        click.echo(f"Display Name:\t{endpoint.display_name}")
+        click.echo(f"Hardware:\t{endpoint.hardware}")
+        click.echo(
+            f"Autoscaling:\tMin={endpoint.autoscaling.min_replicas}, "
+            f"Max={endpoint.autoscaling.max_replicas}"
+        )
+    click.echo(f"Model:\t\t{endpoint.model}")
+    click.echo(f"Type:\t\t{endpoint.type}")
+    click.echo(f"Owner:\t\t{endpoint.owner}")
+    click.echo(f"State:\t\t{endpoint.state}")
+    click.echo(f"Created:\t{endpoint.created_at}")
+F = TypeVar("F", bound=Callable[..., Any])
+def print_api_error(
+    e: InvalidRequestError,
+) -> None:
+    error_details = e.api_response.message
+    if error_details and (
+        "credentials" in error_details.lower()
+        or "authentication" in error_details.lower()
+    ):
+        click.echo("Error: Invalid API key or authentication failed", err=True)
+    else:
+        click.echo(f"Error: {error_details}", err=True)
+def handle_api_errors(f: F) -> F:
+    """Decorator to handle common API errors in CLI commands."""
+    @wraps(f)
+    def wrapper(*args: Any, **kwargs: Any) -> Any:
+        try:
+            return f(*args, **kwargs)
+        except InvalidRequestError as e:
+            print_api_error(e)
+            sys.exit(1)
+        except Exception as e:
+            click.echo(f"Error: An unexpected error occurred - {str(e)}", err=True)
+            sys.exit(1)
+    return wrapper  # type: ignore
+@click.group()
+@click.pass_context
+def endpoints(ctx: click.Context) -> None:
+    """Endpoints API commands"""
+    pass
+@endpoints.command()
+@click.option(
+    "--model",
+    required=True,
+    help="The model to deploy (e.g. mistralai/Mixtral-8x7B-Instruct-v0.1)",
+)
+@click.option(
+    "--min-replicas",
+    type=int,
+    default=1,
+    help="Minimum number of replicas to deploy",
+)
+@click.option(
+    "--max-replicas",
+    type=int,
+    default=1,
+    help="Maximum number of replicas to deploy",
+)
+@click.option(
+    "--gpu",
+    type=click.Choice(["h100", "a100", "l40", "l40s", "rtx-6000"]),
+    required=True,
+    help="GPU type to use for inference",
+)
+@click.option(
+    "--gpu-count",
+    type=int,
+    default=1,
+    help="Number of GPUs to use per replica",
+)
+@click.option(
+    "--display-name",
+    help="A human-readable name for the endpoint",
+)
+@click.option(
+    "--no-prompt-cache",
+    is_flag=True,
+    help="Disable the prompt cache for this endpoint",
+)
+@click.option(
+    "--no-speculative-decoding",
+    is_flag=True,
+    help="Disable speculative decoding for this endpoint",
+)
+@click.option(
+    "--no-auto-start",
+    is_flag=True,
+    help="Create the endpoint in STOPPED state instead of auto-starting it",
+)
+@click.option(
+    "--wait",
+    is_flag=True,
+    default=True,
+    help="Wait for the endpoint to be ready after creation",
+)
+@click.pass_obj
+@handle_api_errors
+def create(
+    client: Together,
+    model: str,
+    min_replicas: int,
+    max_replicas: int,
+    gpu: str,
+    gpu_count: int,
+    display_name: str | None,
+    no_prompt_cache: bool,
+    no_speculative_decoding: bool,
+    no_auto_start: bool,
+    wait: bool,
+) -> None:
+    """Create a new dedicated inference endpoint."""
+    # Map GPU types to their full hardware ID names
+    gpu_map = {
+        "h100": "nvidia_h100_80gb_sxm",
+        "a100": "nvidia_a100_80gb_pcie" if gpu_count == 1 else "nvidia_a100_80gb_sxm",
+        "l40": "nvidia_l40",
+        "l40s": "nvidia_l40s",
+        "rtx-6000": "nvidia_rtx_6000_ada",
+    }
+    hardware_id = f"{gpu_count}x_{gpu_map[gpu]}"
+    try:
+        response = client.endpoints.create(
+            model=model,
+            hardware=hardware_id,
+            min_replicas=min_replicas,
+            max_replicas=max_replicas,
+            display_name=display_name,
+            disable_prompt_cache=no_prompt_cache,
+            disable_speculative_decoding=no_speculative_decoding,
+            state="STOPPED" if no_auto_start else "STARTED",
+        )
+    except InvalidRequestError as e:
+        print_api_error(e)
+        if "check the hardware api" in str(e).lower():
+            fetch_and_print_hardware_options(
+                client=client, model=model, print_json=False, available=True
+            )
+        sys.exit(1)
+    # Print detailed information to stderr
+    click.echo("Created dedicated endpoint with:", err=True)
+    click.echo(f"  Model: {model}", err=True)
+    click.echo(f"  Min replicas: {min_replicas}", err=True)
+    click.echo(f"  Max replicas: {max_replicas}", err=True)
+    click.echo(f"  Hardware: {hardware_id}", err=True)
+    if display_name:
+        click.echo(f"  Display name: {display_name}", err=True)
+    if no_prompt_cache:
+        click.echo("  Prompt cache: disabled", err=True)
+    if no_speculative_decoding:
+        click.echo("  Speculative decoding: disabled", err=True)
+    if no_auto_start:
+        click.echo("  Auto-start: disabled", err=True)
+    click.echo(f"Endpoint created successfully, id: {response.id}", err=True)
+    if wait:
+        import time
+        click.echo("Waiting for endpoint to be ready...", err=True)
+        while client.endpoints.get(response.id).state != "STARTED":
+            time.sleep(1)
+        click.echo("Endpoint ready", err=True)
+    # Print only the endpoint ID to stdout
+    click.echo(response.id)
+@endpoints.command()
+@click.argument("endpoint-id", required=True)
+@click.option("--json", is_flag=True, help="Print output in JSON format")
+@click.pass_obj
+@handle_api_errors
+def get(client: Together, endpoint_id: str, json: bool) -> None:
+    """Get a dedicated inference endpoint."""
+    endpoint = client.endpoints.get(endpoint_id)
+    if json:
+        import json as json_lib
+        click.echo(json_lib.dumps(endpoint.model_dump(), indent=2))
+    else:
+        print_endpoint(endpoint)
+@endpoints.command()
+@click.option("--model", help="Filter hardware options by model")
+@click.option("--json", is_flag=True, help="Print output in JSON format")
+@click.option(
+    "--available",
+    is_flag=True,
+    help="Print only available hardware options (can only be used if model is passed in)",
+)
+@click.pass_obj
+@handle_api_errors
+def hardware(client: Together, model: str | None, json: bool, available: bool) -> None:
+    """List all available hardware options, optionally filtered by model."""
+    fetch_and_print_hardware_options(client, model, json, available)
+def fetch_and_print_hardware_options(
+    client: Together, model: str | None, print_json: bool, available: bool
+) -> None:
+    """Print hardware options for a model."""
+    message = "Available hardware options:" if available else "All hardware options:"
+    click.echo(message, err=True)
+    hardware_options = client.endpoints.list_hardware(model)
+    if available:
+        hardware_options = [
+            hardware
+            for hardware in hardware_options
+            if hardware.availability is not None
+            and hardware.availability.status == "available"
+        ]
+    if print_json:
+        json_output = [hardware.model_dump() for hardware in hardware_options]
+        click.echo(json.dumps(json_output, indent=2))
+    else:
+        for hardware in hardware_options:
+            click.echo(f"  {hardware.id}", err=True)
+@endpoints.command()
+@click.argument("endpoint-id", required=True)
+@click.option(
+    "--wait", is_flag=True, default=True, help="Wait for the endpoint to stop"
+)
+@click.pass_obj
+@handle_api_errors
+def stop(client: Together, endpoint_id: str, wait: bool) -> None:
+    """Stop a dedicated inference endpoint."""
+    client.endpoints.update(endpoint_id, state="STOPPED")
+    click.echo("Successfully marked endpoint as stopping", err=True)
+    if wait:
+        import time
+        click.echo("Waiting for endpoint to stop...", err=True)
+        while client.endpoints.get(endpoint_id).state != "STOPPED":
+            time.sleep(1)
+        click.echo("Endpoint stopped", err=True)
+    click.echo(endpoint_id)
+@endpoints.command()
+@click.argument("endpoint-id", required=True)
+@click.option(
+    "--wait", is_flag=True, default=True, help="Wait for the endpoint to start"
+)
+@click.pass_obj
+@handle_api_errors
+def start(client: Together, endpoint_id: str, wait: bool) -> None:
+    """Start a dedicated inference endpoint."""
+    client.endpoints.update(endpoint_id, state="STARTED")
+    click.echo("Successfully marked endpoint as starting", err=True)
+    if wait:
+        import time
+        click.echo("Waiting for endpoint to start...", err=True)
+        while client.endpoints.get(endpoint_id).state != "STARTED":
+            time.sleep(1)
+        click.echo("Endpoint started", err=True)
+    click.echo(endpoint_id)
+@endpoints.command()
+@click.argument("endpoint-id", required=True)
+@click.pass_obj
+@handle_api_errors
+def delete(client: Together, endpoint_id: str) -> None:
+    """Delete a dedicated inference endpoint."""
+    client.endpoints.delete(endpoint_id)
+    click.echo("Successfully deleted endpoint", err=True)
+    click.echo(endpoint_id)
+@endpoints.command()
+@click.option("--json", is_flag=True, help="Print output in JSON format")
+@click.option(
+    "--type",
+    type=click.Choice(["dedicated", "serverless"]),
+    help="Filter by endpoint type",
+)
+@click.pass_obj
+@handle_api_errors
+def list(
+    client: Together, json: bool, type: Literal["dedicated", "serverless"] | None
+) -> None:
+    """List all inference endpoints (includes both dedicated and serverless endpoints)."""
+    endpoints: List[ListEndpoint] = client.endpoints.list(type=type)
+    if not endpoints:
+        click.echo("No dedicated endpoints found", err=True)
+        return
+    click.echo("Endpoints:", err=True)
+    if json:
+        import json as json_lib
+        click.echo(
+            json_lib.dumps([endpoint.model_dump() for endpoint in endpoints], indent=2)
+        )
+    else:
+        for endpoint in endpoints:
+            print_endpoint(
+                endpoint,
+            )
+            click.echo()
+@endpoints.command()
+@click.argument("endpoint-id", required=True)
+@click.option(
+    "--display-name",
+    help="A new human-readable name for the endpoint",
+)
+@click.option(
+    "--min-replicas",
+    type=int,
+    help="New minimum number of replicas to maintain",
+)
+@click.option(
+    "--max-replicas",
+    type=int,
+    help="New maximum number of replicas to scale up to",
+)
+@click.pass_obj
+@handle_api_errors
+def update(
+    client: Together,
+    endpoint_id: str,
+    display_name: str | None,
+    min_replicas: int | None,
+    max_replicas: int | None,
+) -> None:
+    """Update a dedicated inference endpoint's configuration."""
+    if not any([display_name, min_replicas, max_replicas]):
+        click.echo("Error: At least one update option must be specified", err=True)
+        sys.exit(1)
+    # If only one of min/max replicas is specified, we need both for the update
+    if (min_replicas is None) != (max_replicas is None):
+        click.echo(
+            "Error: Both --min-replicas and --max-replicas must be specified together",
+            err=True,
+        )
+        sys.exit(1)
+    # Build kwargs for the update
+    kwargs: Dict[str, Any] = {}
+    if display_name is not None:
+        kwargs["display_name"] = display_name
+    if min_replicas is not None and max_replicas is not None:
+        kwargs["min_replicas"] = min_replicas
+        kwargs["max_replicas"] = max_replicas
+    _response = client.endpoints.update(endpoint_id, **kwargs)
+    # Print what was updated
+    click.echo("Updated endpoint configuration:", err=True)
+    if display_name:
+        click.echo(f"  Display name: {display_name}", err=True)
+    if min_replicas is not None and max_replicas is not None:
+        click.echo(f"  Min replicas: {min_replicas}", err=True)
+        click.echo(f"  Max replicas: {max_replicas}", err=True)
+    click.echo("Successfully updated endpoint", err=True)
+    click.echo(endpoint_id)

together/cli/api/finetune.py CHANGED Viewed

@@ -1,9 +1,10 @@
 from __future__ import annotations
 import json
-from datetime import datetime
+from datetime import datetime, timezone
 from textwrap import wrap
 from typing import Any, Literal
+import re
 import click
 from click.core import ParameterSource  # type: ignore[attr-defined]
@@ -17,8 +18,13 @@ from together.utils import (
     log_warn,
     log_warn_once,
     parse_timestamp,
+    format_timestamp,
+)
+from together.types.finetune import (
+    DownloadCheckpointType,
+    FinetuneTrainingLimits,
+    FinetuneEventType,
 )
-from together.types.finetune import DownloadCheckpointType, FinetuneTrainingLimits
 _CONFIRMATION_MESSAGE = (
@@ -104,6 +110,18 @@ def fine_tuning(ctx: click.Context) -> None:
     default="all-linear",
     help="Trainable modules for LoRA adapters. For example, 'all-linear', 'q_proj,v_proj'",
 )
+@click.option(
+    "--training-method",
+    type=click.Choice(["sft", "dpo"]),
+    default="sft",
+    help="Training method to use. Options: sft (supervised fine-tuning), dpo (Direct Preference Optimization)",
+)
+@click.option(
+    "--dpo-beta",
+    type=float,
+    default=0.1,
+    help="Beta parameter for DPO training (only used when '--training-method' is 'dpo')",
+)
 @click.option(
     "--suffix", type=str, default=None, help="Suffix for the fine-tuned model name"
 )
@@ -126,6 +144,14 @@ def fine_tuning(ctx: click.Context) -> None:
     help="Whether to mask the user messages in conversational data or prompts in instruction data. "
     "`auto` will automatically determine whether to mask the inputs based on the data format.",
 )
+@click.option(
+    "--from-checkpoint",
+    type=str,
+    default=None,
+    help="The checkpoint identifier to continue training from a previous fine-tuning job. "
+    "The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}. "
+    "The step value is optional, without it the final checkpoint will be used.",
+)
 def create(
     ctx: click.Context,
     training_file: str,
@@ -152,6 +178,9 @@ def create(
     wandb_name: str,
     confirm: bool,
     train_on_inputs: bool | Literal["auto"],
+    training_method: str,
+    dpo_beta: float,
+    from_checkpoint: str,
 ) -> None:
     """Start fine-tuning"""
     client: Together = ctx.obj
@@ -180,6 +209,9 @@ def create(
         wandb_project_name=wandb_project_name,
         wandb_name=wandb_name,
         train_on_inputs=train_on_inputs,
+        training_method=training_method,
+        dpo_beta=dpo_beta,
+        from_checkpoint=from_checkpoint,
     )
     model_limits: FinetuneTrainingLimits = client.fine_tuning.get_model_limits(
@@ -261,7 +293,9 @@ def list(ctx: click.Context) -> None:
     response.data = response.data or []
-    response.data.sort(key=lambda x: parse_timestamp(x.created_at or ""))
+    # Use a default datetime for None values to make sure the key function always returns a comparable value
+    epoch_start = datetime.fromtimestamp(0, tz=timezone.utc)
+    response.data.sort(key=lambda x: parse_timestamp(x.created_at or "") or epoch_start)
     display_list = []
     for i in response.data:
@@ -344,6 +378,34 @@ def list_events(ctx: click.Context, fine_tune_id: str) -> None:
     click.echo(table)
+@fine_tuning.command()
+@click.pass_context
+@click.argument("fine_tune_id", type=str, required=True)
+def list_checkpoints(ctx: click.Context, fine_tune_id: str) -> None:
+    """List available checkpoints for a fine-tuning job"""
+    client: Together = ctx.obj
+    checkpoints = client.fine_tuning.list_checkpoints(fine_tune_id)
+    display_list = []
+    for checkpoint in checkpoints:
+        display_list.append(
+            {
+                "Type": checkpoint.type,
+                "Timestamp": format_timestamp(checkpoint.timestamp),
+                "Name": checkpoint.name,
+            }
+        )
+    if display_list:
+        click.echo(f"Job {fine_tune_id} contains the following checkpoints:")
+        table = tabulate(display_list, headers="keys", tablefmt="grid")
+        click.echo(table)
+        click.echo("\nTo download a checkpoint, use `together fine-tuning download`")
+    else:
+        click.echo(f"No checkpoints found for job {fine_tune_id}")
 @fine_tuning.command()
 @click.pass_context
 @click.argument("fine_tune_id", type=str, required=True)
@@ -358,7 +420,7 @@ def list_events(ctx: click.Context, fine_tune_id: str) -> None:
     "--checkpoint-step",
     type=int,
     required=False,
-    default=-1,
+    default=None,
     help="Download fine-tuning checkpoint. Defaults to latest.",
 )
 @click.option(
@@ -372,7 +434,7 @@ def download(
     ctx: click.Context,
     fine_tune_id: str,
     output_dir: str,
-    checkpoint_step: int,
+    checkpoint_step: int | None,
     checkpoint_type: DownloadCheckpointType,
 ) -> None:
     """Download fine-tuning checkpoint"""

together/cli/cli.py CHANGED Viewed

@@ -8,6 +8,7 @@ import click
 import together
 from together.cli.api.chat import chat, interactive
 from together.cli.api.completions import completions
+from together.cli.api.endpoints import endpoints
 from together.cli.api.files import files
 from together.cli.api.finetune import fine_tuning
 from together.cli.api.images import images
@@ -72,6 +73,7 @@ main.add_command(images)
 main.add_command(files)
 main.add_command(fine_tuning)
 main.add_command(models)
+main.add_command(endpoints)
 if __name__ == "__main__":
     main()

together/client.py CHANGED Viewed

@@ -81,6 +81,7 @@ class Together:
         self.fine_tuning = resources.FineTuning(self.client)
         self.rerank = resources.Rerank(self.client)
         self.audio = resources.Audio(self.client)
+        self.endpoints = resources.Endpoints(self.client)
 class AsyncTogether:

together/constants.py CHANGED Viewed

@@ -39,12 +39,18 @@ class DatasetFormat(enum.Enum):
     GENERAL = "general"
     CONVERSATION = "conversation"
     INSTRUCTION = "instruction"
+    PREFERENCE_OPENAI = "preference_openai"
 JSONL_REQUIRED_COLUMNS_MAP = {
     DatasetFormat.GENERAL: ["text"],
     DatasetFormat.CONVERSATION: ["messages"],
     DatasetFormat.INSTRUCTION: ["prompt", "completion"],
+    DatasetFormat.PREFERENCE_OPENAI: [
+        "input",
+        "preferred_output",
+        "non_preferred_output",
+    ],
 }
 REQUIRED_COLUMNS_MESSAGE = ["role", "content"]
 POSSIBLE_ROLES_CONVERSATION = ["system", "user", "assistant"]

together/error.py CHANGED Viewed

@@ -18,6 +18,9 @@ class TogetherException(Exception):
         request_id: str | None = None,
         http_status: int | None = None,
     ) -> None:
+        if isinstance(message, TogetherErrorResponse):
+            self.api_response = message
         _message = (
             json.dumps(message.model_dump(exclude_none=True))
             if isinstance(message, TogetherErrorResponse)

together/legacy/finetune.py CHANGED Viewed

@@ -161,7 +161,7 @@ class Finetune:
         cls,
         fine_tune_id: str,
         output: str | None = None,
-        step: int = -1,
+        step: int | None = None,
     ) -> Dict[str, Any]:
         """Legacy finetuning download function."""

together/resources/__init__.py CHANGED Viewed

@@ -1,12 +1,13 @@
+from together.resources.audio import AsyncAudio, Audio
 from together.resources.chat import AsyncChat, Chat
 from together.resources.completions import AsyncCompletions, Completions
 from together.resources.embeddings import AsyncEmbeddings, Embeddings
+from together.resources.endpoints import AsyncEndpoints, Endpoints
 from together.resources.files import AsyncFiles, Files
 from together.resources.finetune import AsyncFineTuning, FineTuning
 from together.resources.images import AsyncImages, Images
 from together.resources.models import AsyncModels, Models
 from together.resources.rerank import AsyncRerank, Rerank
-from together.resources.audio import AsyncAudio, Audio
 __all__ = [
@@ -28,4 +29,6 @@ __all__ = [
     "Rerank",
     "AsyncAudio",
     "Audio",
+    "AsyncEndpoints",
+    "Endpoints",
 ]

together 1.4.0__py3-none-any.whl → 1.4.4__py3-none-any.whl

together 1.4.0py3-none-any.whl → 1.4.4py3-none-any.whl