together 2.0.0a15__py3-none-any.whl → 2.0.0a17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- together/_base_client.py +134 -11
- together/_client.py +7 -0
- together/_models.py +16 -1
- together/_types.py +9 -0
- together/_version.py +1 -1
- together/constants.py +1 -1
- together/error.py +1 -2
- together/lib/_google_colab.py +39 -0
- together/lib/cli/__init__.py +73 -0
- together/lib/cli/api/beta/clusters/__init__.py +47 -0
- together/lib/cli/api/beta/{clusters.py → clusters/create.py} +5 -179
- together/lib/cli/api/beta/clusters/delete.py +35 -0
- together/lib/cli/api/beta/clusters/get_credentials.py +151 -0
- together/lib/cli/api/beta/clusters/list.py +24 -0
- together/lib/cli/api/beta/clusters/list_regions.py +37 -0
- together/lib/cli/api/beta/clusters/retrieve.py +31 -0
- together/lib/cli/api/beta/clusters/storage/__init__.py +19 -0
- together/lib/cli/api/beta/clusters/storage/create.py +49 -0
- together/lib/cli/api/beta/clusters/storage/delete.py +38 -0
- together/lib/cli/api/beta/clusters/storage/list.py +42 -0
- together/lib/cli/api/beta/clusters/storage/retrieve.py +34 -0
- together/lib/cli/api/beta/clusters/update.py +54 -0
- together/lib/cli/api/endpoints/__init__.py +56 -0
- together/lib/cli/api/endpoints/availability_zones.py +26 -0
- together/lib/cli/api/endpoints/create.py +161 -0
- together/lib/cli/api/endpoints/delete.py +15 -0
- together/lib/cli/api/endpoints/hardware.py +40 -0
- together/lib/cli/api/endpoints/list.py +66 -0
- together/lib/cli/api/endpoints/retrieve.py +23 -0
- together/lib/cli/api/endpoints/start.py +25 -0
- together/lib/cli/api/endpoints/stop.py +25 -0
- together/lib/cli/api/endpoints/update.py +77 -0
- together/lib/cli/api/evals/__init__.py +19 -0
- together/lib/cli/api/{evals.py → evals/create.py} +6 -129
- together/lib/cli/api/evals/list.py +58 -0
- together/lib/cli/api/evals/retrieve.py +21 -0
- together/lib/cli/api/evals/status.py +20 -0
- together/lib/cli/api/files/__init__.py +23 -0
- together/lib/cli/api/files/check.py +21 -0
- together/lib/cli/api/files/delete.py +20 -0
- together/lib/cli/api/files/list.py +34 -0
- together/lib/cli/api/files/retrieve.py +20 -0
- together/lib/cli/api/files/retrieve_content.py +25 -0
- together/lib/cli/api/files/upload.py +38 -0
- together/lib/cli/api/fine_tuning/__init__.py +27 -0
- together/lib/cli/api/fine_tuning/cancel.py +28 -0
- together/lib/cli/api/{fine_tuning.py → fine_tuning/create.py} +5 -257
- together/lib/cli/api/fine_tuning/delete.py +29 -0
- together/lib/cli/api/fine_tuning/download.py +94 -0
- together/lib/cli/api/fine_tuning/list.py +44 -0
- together/lib/cli/api/fine_tuning/list_checkpoints.py +42 -0
- together/lib/cli/api/fine_tuning/list_events.py +35 -0
- together/lib/cli/api/fine_tuning/retrieve.py +27 -0
- together/lib/cli/api/models/__init__.py +15 -0
- together/lib/cli/api/models/list.py +55 -0
- together/lib/cli/api/{models.py → models/upload.py} +4 -51
- together/resources/beta/clusters/clusters.py +36 -28
- together/resources/beta/clusters/storage.py +30 -21
- together/resources/endpoints.py +2 -2
- together/types/__init__.py +4 -3
- together/types/beta/__init__.py +0 -2
- together/types/beta/cluster_create_params.py +3 -3
- together/types/beta/clusters/__init__.py +0 -1
- together/types/beta/clusters/cluster_storage.py +4 -0
- together/types/chat_completions.py +1 -1
- together/types/endpoint_create_params.py +1 -1
- together/types/endpoints.py +1 -1
- {together-2.0.0a15.dist-info → together-2.0.0a17.dist-info}/METADATA +4 -2
- {together-2.0.0a15.dist-info → together-2.0.0a17.dist-info}/RECORD +74 -38
- together-2.0.0a17.dist-info/entry_points.txt +2 -0
- together/lib/cli/api/__init__.py +0 -0
- together/lib/cli/api/beta/clusters_storage.py +0 -152
- together/lib/cli/api/endpoints.py +0 -467
- together/lib/cli/api/files.py +0 -133
- together/lib/cli/cli.py +0 -73
- together/types/beta/cluster_create_response.py +0 -9
- together/types/beta/cluster_update_response.py +0 -9
- together/types/beta/clusters/storage_create_response.py +0 -9
- together-2.0.0a15.dist-info/entry_points.txt +0 -2
- /together/lib/cli/api/{utils.py → _utils.py} +0 -0
- /together/lib/cli/api/beta/{beta.py → __init__.py} +0 -0
- {together-2.0.0a15.dist-info → together-2.0.0a17.dist-info}/WHEEL +0 -0
- {together-2.0.0a15.dist-info → together-2.0.0a17.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json as json_lib
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
|
|
8
|
+
from together import Together, omit
|
|
9
|
+
from together.lib.cli.api._utils import handle_api_errors
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@click.command()
|
|
13
|
+
@click.argument("cluster-id", required=True)
|
|
14
|
+
@click.option(
|
|
15
|
+
"--num-gpus",
|
|
16
|
+
type=int,
|
|
17
|
+
help="Number of GPUs to allocate in the cluster",
|
|
18
|
+
)
|
|
19
|
+
@click.option(
|
|
20
|
+
"--cluster-type",
|
|
21
|
+
type=click.Choice(["KUBERNETES", "SLURM"]),
|
|
22
|
+
help="Cluster type",
|
|
23
|
+
)
|
|
24
|
+
@click.option(
|
|
25
|
+
"--json",
|
|
26
|
+
is_flag=True,
|
|
27
|
+
help="Output in JSON format",
|
|
28
|
+
)
|
|
29
|
+
@click.pass_context
|
|
30
|
+
@handle_api_errors("Clusters")
|
|
31
|
+
def update(
|
|
32
|
+
ctx: click.Context,
|
|
33
|
+
cluster_id: str,
|
|
34
|
+
num_gpus: int | None = None,
|
|
35
|
+
cluster_type: Literal["KUBERNETES", "SLURM"] | None = None,
|
|
36
|
+
json: bool = False,
|
|
37
|
+
) -> None:
|
|
38
|
+
"""Update a cluster"""
|
|
39
|
+
client: Together = ctx.obj
|
|
40
|
+
|
|
41
|
+
if not json:
|
|
42
|
+
click.echo("Clusters: Updating cluster...")
|
|
43
|
+
|
|
44
|
+
client.beta.clusters.update(
|
|
45
|
+
cluster_id,
|
|
46
|
+
num_gpus=num_gpus if num_gpus is not None else omit,
|
|
47
|
+
cluster_type=cluster_type if cluster_type is not None else omit,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
if json:
|
|
51
|
+
cluster = client.beta.clusters.retrieve(cluster_id)
|
|
52
|
+
click.echo(json_lib.dumps(cluster.model_dump(exclude_none=True), indent=4))
|
|
53
|
+
else:
|
|
54
|
+
click.echo("Clusters: Done")
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
|
|
5
|
+
from together.types import DedicatedEndpoint
|
|
6
|
+
from together.types.endpoint_list_response import Data as DedicatedEndpointListItem
|
|
7
|
+
|
|
8
|
+
from .list import list
|
|
9
|
+
from .stop import stop
|
|
10
|
+
from .start import start
|
|
11
|
+
from .create import create
|
|
12
|
+
from .delete import delete
|
|
13
|
+
from .update import update
|
|
14
|
+
from .hardware import hardware
|
|
15
|
+
from .retrieve import retrieve
|
|
16
|
+
from .availability_zones import availability_zones
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@click.group()
|
|
20
|
+
@click.pass_context
|
|
21
|
+
def endpoints(ctx: click.Context) -> None:
|
|
22
|
+
"""Endpoints API commands"""
|
|
23
|
+
|
|
24
|
+
def print_endpoint(endpoint: DedicatedEndpoint | DedicatedEndpointListItem) -> None:
|
|
25
|
+
"""Print endpoint details in a Docker-like format or JSON."""
|
|
26
|
+
|
|
27
|
+
# Print header info
|
|
28
|
+
click.echo(f"ID:\t\t{endpoint.id}")
|
|
29
|
+
click.echo(f"Name:\t\t{endpoint.name}")
|
|
30
|
+
|
|
31
|
+
if isinstance(endpoint, DedicatedEndpoint):
|
|
32
|
+
click.echo(f"Display Name:\t{endpoint.display_name}")
|
|
33
|
+
click.echo(f"Hardware:\t{endpoint.hardware}")
|
|
34
|
+
click.echo(
|
|
35
|
+
f"Autoscaling:\tMin={endpoint.autoscaling.min_replicas}, Max={endpoint.autoscaling.max_replicas}"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
click.echo(f"Model:\t\t{endpoint.model}")
|
|
39
|
+
click.echo(f"Type:\t\t{endpoint.type}")
|
|
40
|
+
click.echo(f"Owner:\t\t{endpoint.owner}")
|
|
41
|
+
click.echo(f"State:\t\t{endpoint.state}")
|
|
42
|
+
click.echo(f"Created:\t{endpoint.created_at}")
|
|
43
|
+
|
|
44
|
+
ctx.obj.print_endpoint = print_endpoint
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
endpoints.add_command(hardware)
|
|
49
|
+
endpoints.add_command(create)
|
|
50
|
+
endpoints.add_command(retrieve)
|
|
51
|
+
endpoints.add_command(stop)
|
|
52
|
+
endpoints.add_command(start)
|
|
53
|
+
endpoints.add_command(delete)
|
|
54
|
+
endpoints.add_command(list)
|
|
55
|
+
endpoints.add_command(update)
|
|
56
|
+
endpoints.add_command(availability_zones)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import click
|
|
2
|
+
|
|
3
|
+
from together import Together
|
|
4
|
+
from together.lib.cli.api._utils import handle_api_errors
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@click.command()
|
|
8
|
+
@click.option("--json", is_flag=True, help="Print output in JSON format")
|
|
9
|
+
@click.pass_obj
|
|
10
|
+
@handle_api_errors("Endpoints")
|
|
11
|
+
def availability_zones(client: Together, json: bool) -> None:
|
|
12
|
+
"""List all availability zones."""
|
|
13
|
+
avzones = client.endpoints.list_avzones()
|
|
14
|
+
|
|
15
|
+
if not avzones:
|
|
16
|
+
click.echo("No availability zones found", err=True)
|
|
17
|
+
return
|
|
18
|
+
|
|
19
|
+
if json:
|
|
20
|
+
import json as json_lib
|
|
21
|
+
|
|
22
|
+
click.echo(json_lib.dumps(avzones.model_dump(), indent=2))
|
|
23
|
+
else:
|
|
24
|
+
click.echo("Available zones:", err=True)
|
|
25
|
+
for availability_zone in sorted(avzones.avzones):
|
|
26
|
+
click.echo(f" {availability_zone}")
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
from rich import print
|
|
7
|
+
|
|
8
|
+
from together import APIError, Together, omit
|
|
9
|
+
from together.lib.cli.api._utils import handle_api_errors
|
|
10
|
+
|
|
11
|
+
from .hardware import hardware
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@click.command()
|
|
15
|
+
@click.option(
|
|
16
|
+
"--model",
|
|
17
|
+
required=True,
|
|
18
|
+
help="The model to deploy (e.g. meta-llama/Llama-4-Scout-17B-16E-Instruct)",
|
|
19
|
+
)
|
|
20
|
+
@click.option(
|
|
21
|
+
"--min-replicas",
|
|
22
|
+
type=int,
|
|
23
|
+
default=1,
|
|
24
|
+
help="Minimum number of replicas to deploy",
|
|
25
|
+
)
|
|
26
|
+
@click.option(
|
|
27
|
+
"--max-replicas",
|
|
28
|
+
type=int,
|
|
29
|
+
default=1,
|
|
30
|
+
help="Maximum number of replicas to deploy",
|
|
31
|
+
)
|
|
32
|
+
@click.option(
|
|
33
|
+
"--gpu",
|
|
34
|
+
type=click.Choice(["b200", "h200", "h100", "a100", "l40", "l40s", "rtx-6000"]),
|
|
35
|
+
required=True,
|
|
36
|
+
help="GPU type to use for inference",
|
|
37
|
+
)
|
|
38
|
+
@click.option(
|
|
39
|
+
"--gpu-count",
|
|
40
|
+
type=int,
|
|
41
|
+
default=1,
|
|
42
|
+
help="Number of GPUs to use per replica",
|
|
43
|
+
)
|
|
44
|
+
@click.option(
|
|
45
|
+
"--display-name",
|
|
46
|
+
help="A human-readable name for the endpoint",
|
|
47
|
+
)
|
|
48
|
+
@click.option(
|
|
49
|
+
"--no-prompt-cache",
|
|
50
|
+
is_flag=True,
|
|
51
|
+
help="Deprecated and no longer has any effect.",
|
|
52
|
+
)
|
|
53
|
+
@click.option(
|
|
54
|
+
"--no-speculative-decoding",
|
|
55
|
+
is_flag=True,
|
|
56
|
+
help="Disable speculative decoding for this endpoint",
|
|
57
|
+
)
|
|
58
|
+
@click.option(
|
|
59
|
+
"--no-auto-start",
|
|
60
|
+
is_flag=True,
|
|
61
|
+
help="Create the endpoint in STOPPED state instead of auto-starting it",
|
|
62
|
+
)
|
|
63
|
+
@click.option(
|
|
64
|
+
"--inactive-timeout",
|
|
65
|
+
type=int,
|
|
66
|
+
help="Number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable.",
|
|
67
|
+
)
|
|
68
|
+
@click.option(
|
|
69
|
+
"--availability-zone",
|
|
70
|
+
help="Start endpoint in specified availability zone (e.g., us-central-4b)",
|
|
71
|
+
)
|
|
72
|
+
@click.option(
|
|
73
|
+
"--wait/--no-wait",
|
|
74
|
+
default=True,
|
|
75
|
+
help="Wait for the endpoint to be ready after creation",
|
|
76
|
+
)
|
|
77
|
+
@click.pass_context
|
|
78
|
+
@handle_api_errors("Endpoints")
|
|
79
|
+
def create(
|
|
80
|
+
ctx: click.Context,
|
|
81
|
+
model: str,
|
|
82
|
+
min_replicas: int,
|
|
83
|
+
max_replicas: int,
|
|
84
|
+
gpu: str,
|
|
85
|
+
gpu_count: int,
|
|
86
|
+
display_name: str | None,
|
|
87
|
+
no_prompt_cache: bool | None,
|
|
88
|
+
no_speculative_decoding: bool | None,
|
|
89
|
+
no_auto_start: bool,
|
|
90
|
+
inactive_timeout: int | None,
|
|
91
|
+
availability_zone: str | None,
|
|
92
|
+
wait: bool,
|
|
93
|
+
) -> None:
|
|
94
|
+
"""Create a new dedicated inference endpoint."""
|
|
95
|
+
client: Together = ctx.obj
|
|
96
|
+
# Map GPU types to their full hardware ID names
|
|
97
|
+
gpu_map = {
|
|
98
|
+
"b200": "nvidia_b200_180gb_sxm",
|
|
99
|
+
"h200": "nvidia_h200_140gb_sxm",
|
|
100
|
+
"h100": "nvidia_h100_80gb_sxm",
|
|
101
|
+
"a100": "nvidia_a100_80gb_pcie" if gpu_count == 1 else "nvidia_a100_80gb_sxm",
|
|
102
|
+
"l40": "nvidia_l40",
|
|
103
|
+
"l40s": "nvidia_l40s",
|
|
104
|
+
"rtx-6000": "nvidia_rtx_6000_ada",
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if no_prompt_cache is not None:
|
|
108
|
+
click.echo("Warning: --no-prompt-cache is deprecated and no longer has any effect.", err=True)
|
|
109
|
+
|
|
110
|
+
hardware_id = f"{gpu_count}x_{gpu_map[gpu]}"
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
response = client.endpoints.create(
|
|
114
|
+
model=model,
|
|
115
|
+
hardware=hardware_id,
|
|
116
|
+
autoscaling={
|
|
117
|
+
"min_replicas": min_replicas,
|
|
118
|
+
"max_replicas": max_replicas,
|
|
119
|
+
},
|
|
120
|
+
display_name=display_name or omit,
|
|
121
|
+
disable_speculative_decoding=no_speculative_decoding or omit,
|
|
122
|
+
state="STOPPED" if no_auto_start else "STARTED",
|
|
123
|
+
inactive_timeout=inactive_timeout,
|
|
124
|
+
extra_query={"availability_zone": availability_zone or omit},
|
|
125
|
+
)
|
|
126
|
+
except APIError as e:
|
|
127
|
+
if "check the hardware api" in str(e.args[0]).lower() or "invalid hardware provided" in str(e.args[0]).lower():
|
|
128
|
+
print("Invalid hardware provided")
|
|
129
|
+
ctx.invoke(hardware, available=True, model=model, json=False)
|
|
130
|
+
sys.exit(1)
|
|
131
|
+
raise e
|
|
132
|
+
|
|
133
|
+
# Print detailed information to stderr
|
|
134
|
+
click.echo("Created dedicated endpoint with:", err=True)
|
|
135
|
+
click.echo(f" Model: {model}", err=True)
|
|
136
|
+
click.echo(f" Min replicas: {min_replicas}", err=True)
|
|
137
|
+
click.echo(f" Max replicas: {max_replicas}", err=True)
|
|
138
|
+
click.echo(f" Hardware: {hardware_id}", err=True)
|
|
139
|
+
if display_name:
|
|
140
|
+
click.echo(f" Display name: {display_name}", err=True)
|
|
141
|
+
if no_speculative_decoding:
|
|
142
|
+
click.echo(" Speculative decoding: disabled", err=True)
|
|
143
|
+
if no_auto_start:
|
|
144
|
+
click.echo(" Auto-start: disabled", err=True)
|
|
145
|
+
if inactive_timeout is not None:
|
|
146
|
+
click.echo(f" Inactive timeout: {inactive_timeout} minutes", err=True)
|
|
147
|
+
if availability_zone:
|
|
148
|
+
click.echo(f" Availability zone: {availability_zone}", err=True)
|
|
149
|
+
|
|
150
|
+
click.echo(f"Endpoint created successfully, id: {response.id}", err=True)
|
|
151
|
+
|
|
152
|
+
if wait:
|
|
153
|
+
import time
|
|
154
|
+
|
|
155
|
+
click.echo("Waiting for endpoint to be ready...", err=True)
|
|
156
|
+
while client.endpoints.retrieve(response.id).state != "STARTED":
|
|
157
|
+
time.sleep(1)
|
|
158
|
+
click.echo("Endpoint ready", err=True)
|
|
159
|
+
|
|
160
|
+
# Print only the endpoint ID to stdout
|
|
161
|
+
click.echo(response.id)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import click
|
|
2
|
+
|
|
3
|
+
from together import Together
|
|
4
|
+
from together.lib.cli.api._utils import handle_api_errors
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@click.command()
|
|
8
|
+
@click.argument("endpoint-id", required=True)
|
|
9
|
+
@click.pass_obj
|
|
10
|
+
@handle_api_errors("Endpoints")
|
|
11
|
+
def delete(client: Together, endpoint_id: str) -> None:
|
|
12
|
+
"""Delete a dedicated inference endpoint."""
|
|
13
|
+
client.endpoints.delete(endpoint_id)
|
|
14
|
+
click.echo("Successfully deleted endpoint", err=True)
|
|
15
|
+
click.echo(endpoint_id)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json as json_lib
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from together import Together, omit
|
|
8
|
+
from together.lib.cli.api._utils import handle_api_errors
|
|
9
|
+
from together.lib.utils.serializer import datetime_serializer
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@click.command()
|
|
13
|
+
@click.option("--model", help="Filter hardware options by model")
|
|
14
|
+
@click.option("--json", is_flag=True, help="Print output in JSON format")
|
|
15
|
+
@click.option(
|
|
16
|
+
"--available",
|
|
17
|
+
is_flag=True,
|
|
18
|
+
help="Print only available hardware options (can only be used if model is passed in)",
|
|
19
|
+
)
|
|
20
|
+
@click.pass_obj
|
|
21
|
+
@handle_api_errors("Endpoints")
|
|
22
|
+
def hardware(client: Together, model: str | None, json: bool, available: bool) -> None:
|
|
23
|
+
"""List all available hardware options, optionally filtered by model."""
|
|
24
|
+
message = "Available hardware options:" if available else "All hardware options:"
|
|
25
|
+
click.echo(message, err=True)
|
|
26
|
+
hardware_options = client.hardware.list(model=model or omit)
|
|
27
|
+
# hardware_options = client.endpoints.list_hardware(model)
|
|
28
|
+
if available:
|
|
29
|
+
hardware_options.data = [
|
|
30
|
+
hardware
|
|
31
|
+
for hardware in hardware_options.data
|
|
32
|
+
if hardware.availability is not None and hardware.availability.status == "available"
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
if json:
|
|
36
|
+
json_output = [hardware.model_dump() for hardware in hardware_options.data]
|
|
37
|
+
click.echo(json_lib.dumps(json_output, default=datetime_serializer, indent=2))
|
|
38
|
+
else:
|
|
39
|
+
for hardware in hardware_options.data:
|
|
40
|
+
click.echo(f" {hardware.id}", err=True)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Literal
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from together import Together, omit
|
|
8
|
+
from together.lib.cli.api._utils import handle_api_errors
|
|
9
|
+
from together.lib.utils.serializer import datetime_serializer
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@click.command()
|
|
13
|
+
@click.option("--json", is_flag=True, help="Print output in JSON format")
|
|
14
|
+
@click.option(
|
|
15
|
+
"--type",
|
|
16
|
+
type=click.Choice(["dedicated", "serverless"]),
|
|
17
|
+
help="Filter by endpoint type",
|
|
18
|
+
)
|
|
19
|
+
@click.option(
|
|
20
|
+
"--mine",
|
|
21
|
+
type=click.BOOL,
|
|
22
|
+
default=None,
|
|
23
|
+
help="true (only mine), default=all",
|
|
24
|
+
)
|
|
25
|
+
@click.option(
|
|
26
|
+
"--usage-type",
|
|
27
|
+
type=click.Choice(["on-demand", "reserved"]),
|
|
28
|
+
help="Filter by endpoint usage type",
|
|
29
|
+
)
|
|
30
|
+
@click.pass_context
|
|
31
|
+
@handle_api_errors("Endpoints")
|
|
32
|
+
def list(
|
|
33
|
+
ctx: click.Context,
|
|
34
|
+
json: bool,
|
|
35
|
+
type: Literal["dedicated", "serverless"] | None,
|
|
36
|
+
usage_type: Literal["on-demand", "reserved"] | None,
|
|
37
|
+
mine: bool | None,
|
|
38
|
+
) -> None:
|
|
39
|
+
"""List all inference endpoints (includes both dedicated and serverless endpoints)."""
|
|
40
|
+
client: Together = ctx.obj
|
|
41
|
+
|
|
42
|
+
endpoints = client.endpoints.list(
|
|
43
|
+
type=type or omit,
|
|
44
|
+
usage_type=usage_type or omit,
|
|
45
|
+
mine=mine if mine is not None else omit,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
if not endpoints:
|
|
49
|
+
click.echo("No dedicated endpoints found", err=True)
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
click.echo("Endpoints:", err=True)
|
|
53
|
+
if json:
|
|
54
|
+
import json as json_lib
|
|
55
|
+
|
|
56
|
+
click.echo(
|
|
57
|
+
json_lib.dumps(
|
|
58
|
+
[endpoint.model_dump() for endpoint in endpoints.data], default=datetime_serializer, indent=2
|
|
59
|
+
)
|
|
60
|
+
)
|
|
61
|
+
else:
|
|
62
|
+
for endpoint in endpoints.data:
|
|
63
|
+
ctx.obj.print_endpoint(
|
|
64
|
+
endpoint,
|
|
65
|
+
)
|
|
66
|
+
click.echo()
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import click
|
|
2
|
+
|
|
3
|
+
from together import Together
|
|
4
|
+
from together.lib.cli.api._utils import handle_api_errors
|
|
5
|
+
from together.lib.utils.serializer import datetime_serializer
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@click.command()
|
|
9
|
+
@click.argument("endpoint-id", required=True)
|
|
10
|
+
@click.option("--json", is_flag=True, help="Print output in JSON format")
|
|
11
|
+
@click.pass_context
|
|
12
|
+
@handle_api_errors("Endpoints")
|
|
13
|
+
def retrieve(ctx: click.Context, endpoint_id: str, json: bool) -> None:
|
|
14
|
+
"""Get a dedicated inference endpoint."""
|
|
15
|
+
client: Together = ctx.obj
|
|
16
|
+
|
|
17
|
+
endpoint = client.endpoints.retrieve(endpoint_id)
|
|
18
|
+
if json:
|
|
19
|
+
import json as json_lib
|
|
20
|
+
|
|
21
|
+
click.echo(json_lib.dumps(endpoint.model_dump(), indent=2, default=datetime_serializer))
|
|
22
|
+
else:
|
|
23
|
+
ctx.obj.print_endpoint(endpoint)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import click
|
|
2
|
+
|
|
3
|
+
from together import Together
|
|
4
|
+
from together.lib.cli.api._utils import handle_api_errors
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@click.command()
|
|
8
|
+
@click.argument("endpoint-id", required=True)
|
|
9
|
+
@click.option("--wait/--no-wait", default=True, help="Wait for the endpoint to start")
|
|
10
|
+
@click.pass_obj
|
|
11
|
+
@handle_api_errors("Endpoints")
|
|
12
|
+
def start(client: Together, endpoint_id: str, wait: bool) -> None:
|
|
13
|
+
"""Start a dedicated inference endpoint."""
|
|
14
|
+
client.endpoints.update(endpoint_id, state="STARTED")
|
|
15
|
+
click.echo("Successfully marked endpoint as starting", err=True)
|
|
16
|
+
|
|
17
|
+
if wait:
|
|
18
|
+
import time
|
|
19
|
+
|
|
20
|
+
click.echo("Waiting for endpoint to start...", err=True)
|
|
21
|
+
while client.endpoints.retrieve(endpoint_id).state != "STARTED":
|
|
22
|
+
time.sleep(1)
|
|
23
|
+
click.echo("Endpoint started", err=True)
|
|
24
|
+
|
|
25
|
+
click.echo(endpoint_id)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import click
|
|
2
|
+
|
|
3
|
+
from together import Together
|
|
4
|
+
from together.lib.cli.api._utils import handle_api_errors
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@click.command()
|
|
8
|
+
@click.argument("endpoint-id", required=True)
|
|
9
|
+
@click.option("--wait/--no-wait", default=True, help="Wait for the endpoint to stop")
|
|
10
|
+
@click.pass_obj
|
|
11
|
+
@handle_api_errors("Endpoints")
|
|
12
|
+
def stop(client: Together, endpoint_id: str, wait: bool) -> None:
|
|
13
|
+
"""Stop a dedicated inference endpoint."""
|
|
14
|
+
client.endpoints.update(endpoint_id, state="STOPPED")
|
|
15
|
+
click.echo("Successfully marked endpoint as stopping", err=True)
|
|
16
|
+
|
|
17
|
+
if wait:
|
|
18
|
+
import time
|
|
19
|
+
|
|
20
|
+
click.echo("Waiting for endpoint to stop...", err=True)
|
|
21
|
+
while client.endpoints.retrieve(endpoint_id).state != "STOPPED":
|
|
22
|
+
time.sleep(1)
|
|
23
|
+
click.echo("Endpoint stopped", err=True)
|
|
24
|
+
|
|
25
|
+
click.echo(endpoint_id)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from typing import Any, Dict
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
|
|
8
|
+
from together import Together
|
|
9
|
+
from together.lib.cli.api._utils import handle_api_errors
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@click.command()
|
|
13
|
+
@click.argument("endpoint-id", required=True)
|
|
14
|
+
@click.option(
|
|
15
|
+
"--display-name",
|
|
16
|
+
help="A new human-readable name for the endpoint",
|
|
17
|
+
)
|
|
18
|
+
@click.option(
|
|
19
|
+
"--min-replicas",
|
|
20
|
+
type=int,
|
|
21
|
+
help="New minimum number of replicas to maintain",
|
|
22
|
+
)
|
|
23
|
+
@click.option(
|
|
24
|
+
"--max-replicas",
|
|
25
|
+
type=int,
|
|
26
|
+
help="New maximum number of replicas to scale up to",
|
|
27
|
+
)
|
|
28
|
+
@click.option(
|
|
29
|
+
"--inactive-timeout",
|
|
30
|
+
type=int,
|
|
31
|
+
help="Number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable.",
|
|
32
|
+
)
|
|
33
|
+
@click.pass_obj
|
|
34
|
+
@handle_api_errors("Endpoints")
|
|
35
|
+
def update(
|
|
36
|
+
client: Together,
|
|
37
|
+
endpoint_id: str,
|
|
38
|
+
display_name: str | None,
|
|
39
|
+
min_replicas: int | None,
|
|
40
|
+
max_replicas: int | None,
|
|
41
|
+
inactive_timeout: int | None,
|
|
42
|
+
) -> None:
|
|
43
|
+
"""Update a dedicated inference endpoint's configuration."""
|
|
44
|
+
if not any([display_name, min_replicas, max_replicas, inactive_timeout]):
|
|
45
|
+
click.echo("Error: At least one update option must be specified", err=True)
|
|
46
|
+
sys.exit(1)
|
|
47
|
+
|
|
48
|
+
# Build kwargs for the update
|
|
49
|
+
kwargs: Dict[str, Any] = {}
|
|
50
|
+
if display_name is not None:
|
|
51
|
+
kwargs["display_name"] = display_name
|
|
52
|
+
|
|
53
|
+
if min_replicas is not None or max_replicas is not None:
|
|
54
|
+
kwargs["autoscaling"] = {}
|
|
55
|
+
if min_replicas is not None:
|
|
56
|
+
kwargs["autoscaling"]["min_replicas"] = min_replicas
|
|
57
|
+
if max_replicas is not None:
|
|
58
|
+
kwargs["autoscaling"]["max_replicas"] = max_replicas
|
|
59
|
+
|
|
60
|
+
if inactive_timeout is not None:
|
|
61
|
+
kwargs["inactive_timeout"] = inactive_timeout
|
|
62
|
+
|
|
63
|
+
client.endpoints.update(endpoint_id, **kwargs)
|
|
64
|
+
|
|
65
|
+
# Print what was updated
|
|
66
|
+
click.echo("Updated endpoint configuration:", err=True)
|
|
67
|
+
if display_name:
|
|
68
|
+
click.echo(f" Display name: {display_name}", err=True)
|
|
69
|
+
if min_replicas:
|
|
70
|
+
click.echo(f" Min replicas: {min_replicas}", err=True)
|
|
71
|
+
if max_replicas:
|
|
72
|
+
click.echo(f" Max replicas: {max_replicas}", err=True)
|
|
73
|
+
if inactive_timeout is not None:
|
|
74
|
+
click.echo(f" Inactive timeout: {inactive_timeout} minutes", err=True)
|
|
75
|
+
|
|
76
|
+
click.echo("Successfully updated endpoint", err=True)
|
|
77
|
+
click.echo(endpoint_id)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import click
|
|
2
|
+
|
|
3
|
+
from .list import list
|
|
4
|
+
from .create import create
|
|
5
|
+
from .status import status
|
|
6
|
+
from .retrieve import retrieve
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@click.group()
|
|
10
|
+
@click.pass_context
|
|
11
|
+
def evals(ctx: click.Context) -> None:
|
|
12
|
+
"""Evals API commands"""
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
evals.add_command(create)
|
|
17
|
+
evals.add_command(list)
|
|
18
|
+
evals.add_command(retrieve)
|
|
19
|
+
evals.add_command(status)
|