together 2.0.0a14__py3-none-any.whl → 2.0.0a16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- together/_base_client.py +134 -11
- together/_client.py +7 -0
- together/_models.py +16 -1
- together/_types.py +9 -0
- together/_version.py +1 -1
- together/constants.py +34 -0
- together/error.py +15 -0
- together/lib/_google_colab.py +39 -0
- together/lib/cli/__init__.py +73 -0
- together/lib/cli/api/{utils.py → _utils.py} +4 -4
- together/lib/cli/api/beta/clusters/__init__.py +47 -0
- together/lib/cli/api/beta/{clusters.py → clusters/create.py} +5 -179
- together/lib/cli/api/beta/clusters/delete.py +35 -0
- together/lib/cli/api/beta/clusters/get_credentials.py +151 -0
- together/lib/cli/api/beta/clusters/list.py +24 -0
- together/lib/cli/api/beta/clusters/list_regions.py +37 -0
- together/lib/cli/api/beta/clusters/retrieve.py +31 -0
- together/lib/cli/api/beta/clusters/storage/__init__.py +19 -0
- together/lib/cli/api/beta/clusters/storage/create.py +49 -0
- together/lib/cli/api/beta/clusters/storage/delete.py +38 -0
- together/lib/cli/api/beta/clusters/storage/list.py +42 -0
- together/lib/cli/api/beta/clusters/storage/retrieve.py +34 -0
- together/lib/cli/api/beta/clusters/update.py +54 -0
- together/lib/cli/api/endpoints/__init__.py +56 -0
- together/lib/cli/api/endpoints/availability_zones.py +26 -0
- together/lib/cli/api/endpoints/create.py +159 -0
- together/lib/cli/api/endpoints/delete.py +15 -0
- together/lib/cli/api/endpoints/hardware.py +40 -0
- together/lib/cli/api/endpoints/list.py +66 -0
- together/lib/cli/api/endpoints/retrieve.py +23 -0
- together/lib/cli/api/endpoints/start.py +25 -0
- together/lib/cli/api/endpoints/stop.py +25 -0
- together/lib/cli/api/endpoints/update.py +77 -0
- together/lib/cli/api/evals/__init__.py +19 -0
- together/lib/cli/api/{evals.py → evals/create.py} +6 -129
- together/lib/cli/api/evals/list.py +58 -0
- together/lib/cli/api/evals/retrieve.py +21 -0
- together/lib/cli/api/evals/status.py +20 -0
- together/lib/cli/api/files/__init__.py +23 -0
- together/lib/cli/api/files/check.py +21 -0
- together/lib/cli/api/files/delete.py +20 -0
- together/lib/cli/api/files/list.py +34 -0
- together/lib/cli/api/files/retrieve.py +20 -0
- together/lib/cli/api/files/retrieve_content.py +25 -0
- together/lib/cli/api/files/upload.py +38 -0
- together/lib/cli/api/fine_tuning/__init__.py +27 -0
- together/lib/cli/api/fine_tuning/cancel.py +28 -0
- together/lib/cli/api/{fine_tuning.py → fine_tuning/create.py} +5 -257
- together/lib/cli/api/fine_tuning/delete.py +29 -0
- together/lib/cli/api/fine_tuning/download.py +94 -0
- together/lib/cli/api/fine_tuning/list.py +44 -0
- together/lib/cli/api/fine_tuning/list_checkpoints.py +42 -0
- together/lib/cli/api/fine_tuning/list_events.py +35 -0
- together/lib/cli/api/fine_tuning/retrieve.py +27 -0
- together/lib/cli/api/models/__init__.py +15 -0
- together/lib/cli/api/models/list.py +51 -0
- together/lib/cli/api/{models.py → models/upload.py} +4 -51
- together/lib/types/fine_tuning.py +3 -0
- together/resources/beta/clusters/clusters.py +36 -28
- together/resources/beta/clusters/storage.py +30 -21
- together/types/__init__.py +13 -1
- together/types/beta/__init__.py +0 -2
- together/types/beta/cluster_create_params.py +3 -3
- together/types/beta/clusters/__init__.py +0 -1
- together/types/beta/clusters/cluster_storage.py +4 -0
- together/types/chat_completions.py +7 -0
- together/types/endpoints.py +4 -0
- together/types/files.py +8 -0
- together/types/fine_tuning_cancel_response.py +3 -0
- together/types/fine_tuning_list_response.py +3 -0
- together/types/finetune.py +27 -0
- together/types/finetune_response.py +2 -0
- together/types/models.py +2 -0
- {together-2.0.0a14.dist-info → together-2.0.0a16.dist-info}/METADATA +45 -2
- {together-2.0.0a14.dist-info → together-2.0.0a16.dist-info}/RECORD +79 -36
- together-2.0.0a16.dist-info/entry_points.txt +2 -0
- together/lib/cli/api/__init__.py +0 -0
- together/lib/cli/api/beta/clusters_storage.py +0 -152
- together/lib/cli/api/endpoints.py +0 -467
- together/lib/cli/api/files.py +0 -133
- together/lib/cli/cli.py +0 -73
- together/types/beta/cluster_create_response.py +0 -9
- together/types/beta/cluster_update_response.py +0 -9
- together/types/beta/clusters/storage_create_response.py +0 -9
- together-2.0.0a14.dist-info/entry_points.txt +0 -2
- /together/lib/cli/api/beta/{beta.py → __init__.py} +0 -0
- {together-2.0.0a14.dist-info → together-2.0.0a16.dist-info}/WHEEL +0 -0
- {together-2.0.0a14.dist-info → together-2.0.0a16.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,467 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import sys
|
|
4
|
-
import json
|
|
5
|
-
from typing import Any, Dict, Literal, TypeVar, Callable, cast
|
|
6
|
-
from functools import wraps
|
|
7
|
-
|
|
8
|
-
import click
|
|
9
|
-
|
|
10
|
-
from together import Together, omit
|
|
11
|
-
from together.types import DedicatedEndpoint
|
|
12
|
-
from together._exceptions import APIError
|
|
13
|
-
from together.lib.utils.serializer import datetime_serializer
|
|
14
|
-
from together.types.endpoint_list_response import Data as DedicatedEndpointListItem
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def print_endpoint(endpoint: DedicatedEndpoint | DedicatedEndpointListItem) -> None:
|
|
18
|
-
"""Print endpoint details in a Docker-like format or JSON."""
|
|
19
|
-
|
|
20
|
-
# Print header info
|
|
21
|
-
click.echo(f"ID:\t\t{endpoint.id}")
|
|
22
|
-
click.echo(f"Name:\t\t{endpoint.name}")
|
|
23
|
-
|
|
24
|
-
if isinstance(endpoint, DedicatedEndpoint):
|
|
25
|
-
click.echo(f"Display Name:\t{endpoint.display_name}")
|
|
26
|
-
click.echo(f"Hardware:\t{endpoint.hardware}")
|
|
27
|
-
click.echo(f"Autoscaling:\tMin={endpoint.autoscaling.min_replicas}, Max={endpoint.autoscaling.max_replicas}")
|
|
28
|
-
|
|
29
|
-
click.echo(f"Model:\t\t{endpoint.model}")
|
|
30
|
-
click.echo(f"Type:\t\t{endpoint.type}")
|
|
31
|
-
click.echo(f"Owner:\t\t{endpoint.owner}")
|
|
32
|
-
click.echo(f"State:\t\t{endpoint.state}")
|
|
33
|
-
click.echo(f"Created:\t{endpoint.created_at}")
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
F = TypeVar("F", bound=Callable[..., Any])
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def print_api_error(e: APIError) -> None:
|
|
40
|
-
error_details = cast(Dict[str, Any], e.body)["error"]["message"]
|
|
41
|
-
|
|
42
|
-
if error_details and ("credentials" in error_details.lower() or "authentication" in error_details.lower()):
|
|
43
|
-
click.echo("Error: Invalid API key or authentication failed", err=True)
|
|
44
|
-
else:
|
|
45
|
-
click.echo(f"Error: {error_details}", err=True)
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def handle_api_errors(f: F) -> F:
|
|
49
|
-
"""Decorator to handle common API errors in CLI commands."""
|
|
50
|
-
|
|
51
|
-
@wraps(f)
|
|
52
|
-
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
53
|
-
try:
|
|
54
|
-
return f(*args, **kwargs)
|
|
55
|
-
except APIError as e:
|
|
56
|
-
print_api_error(e)
|
|
57
|
-
sys.exit(1)
|
|
58
|
-
except Exception as e:
|
|
59
|
-
click.echo(f"Error: An unexpected error occurred - {str(e)}", err=True)
|
|
60
|
-
sys.exit(1)
|
|
61
|
-
|
|
62
|
-
return wrapper # type: ignore
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
@click.group()
|
|
66
|
-
@click.pass_context
|
|
67
|
-
def endpoints(ctx: click.Context) -> None:
|
|
68
|
-
"""Endpoints API commands"""
|
|
69
|
-
pass
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
@endpoints.command()
|
|
73
|
-
@click.option(
|
|
74
|
-
"--model",
|
|
75
|
-
required=True,
|
|
76
|
-
help="The model to deploy (e.g. meta-llama/Llama-4-Scout-17B-16E-Instruct)",
|
|
77
|
-
)
|
|
78
|
-
@click.option(
|
|
79
|
-
"--min-replicas",
|
|
80
|
-
type=int,
|
|
81
|
-
default=1,
|
|
82
|
-
help="Minimum number of replicas to deploy",
|
|
83
|
-
)
|
|
84
|
-
@click.option(
|
|
85
|
-
"--max-replicas",
|
|
86
|
-
type=int,
|
|
87
|
-
default=1,
|
|
88
|
-
help="Maximum number of replicas to deploy",
|
|
89
|
-
)
|
|
90
|
-
@click.option(
|
|
91
|
-
"--gpu",
|
|
92
|
-
type=click.Choice(["h100", "a100", "l40", "l40s", "rtx-6000"]),
|
|
93
|
-
required=True,
|
|
94
|
-
help="GPU type to use for inference",
|
|
95
|
-
)
|
|
96
|
-
@click.option(
|
|
97
|
-
"--gpu-count",
|
|
98
|
-
type=int,
|
|
99
|
-
default=1,
|
|
100
|
-
help="Number of GPUs to use per replica",
|
|
101
|
-
)
|
|
102
|
-
@click.option(
|
|
103
|
-
"--display-name",
|
|
104
|
-
help="A human-readable name for the endpoint",
|
|
105
|
-
)
|
|
106
|
-
@click.option(
|
|
107
|
-
"--no-prompt-cache",
|
|
108
|
-
is_flag=True,
|
|
109
|
-
help="Disable the prompt cache for this endpoint",
|
|
110
|
-
)
|
|
111
|
-
@click.option(
|
|
112
|
-
"--no-speculative-decoding",
|
|
113
|
-
is_flag=True,
|
|
114
|
-
help="Disable speculative decoding for this endpoint",
|
|
115
|
-
)
|
|
116
|
-
@click.option(
|
|
117
|
-
"--no-auto-start",
|
|
118
|
-
is_flag=True,
|
|
119
|
-
help="Create the endpoint in STOPPED state instead of auto-starting it",
|
|
120
|
-
)
|
|
121
|
-
@click.option(
|
|
122
|
-
"--inactive-timeout",
|
|
123
|
-
type=int,
|
|
124
|
-
help="Number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable.",
|
|
125
|
-
)
|
|
126
|
-
@click.option(
|
|
127
|
-
"--availability-zone",
|
|
128
|
-
help="Start endpoint in specified availability zone (e.g., us-central-4b)",
|
|
129
|
-
)
|
|
130
|
-
@click.option(
|
|
131
|
-
"--wait/--no-wait",
|
|
132
|
-
default=True,
|
|
133
|
-
help="Wait for the endpoint to be ready after creation",
|
|
134
|
-
)
|
|
135
|
-
@click.pass_obj
|
|
136
|
-
@handle_api_errors
|
|
137
|
-
def create(
|
|
138
|
-
client: Together,
|
|
139
|
-
model: str,
|
|
140
|
-
min_replicas: int,
|
|
141
|
-
max_replicas: int,
|
|
142
|
-
gpu: str,
|
|
143
|
-
gpu_count: int,
|
|
144
|
-
display_name: str | None,
|
|
145
|
-
no_prompt_cache: bool | None,
|
|
146
|
-
no_speculative_decoding: bool | None,
|
|
147
|
-
no_auto_start: bool,
|
|
148
|
-
inactive_timeout: int | None,
|
|
149
|
-
availability_zone: str | None,
|
|
150
|
-
wait: bool,
|
|
151
|
-
) -> None:
|
|
152
|
-
"""Create a new dedicated inference endpoint."""
|
|
153
|
-
# Map GPU types to their full hardware ID names
|
|
154
|
-
gpu_map = {
|
|
155
|
-
"h100": "nvidia_h100_80gb_sxm",
|
|
156
|
-
"a100": "nvidia_a100_80gb_pcie" if gpu_count == 1 else "nvidia_a100_80gb_sxm",
|
|
157
|
-
"l40": "nvidia_l40",
|
|
158
|
-
"l40s": "nvidia_l40s",
|
|
159
|
-
"rtx-6000": "nvidia_rtx_6000_ada",
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
hardware_id = f"{gpu_count}x_{gpu_map[gpu]}"
|
|
163
|
-
|
|
164
|
-
try:
|
|
165
|
-
response = client.endpoints.create(
|
|
166
|
-
model=model,
|
|
167
|
-
hardware=hardware_id,
|
|
168
|
-
autoscaling={
|
|
169
|
-
"min_replicas": min_replicas,
|
|
170
|
-
"max_replicas": max_replicas,
|
|
171
|
-
},
|
|
172
|
-
display_name=display_name or omit,
|
|
173
|
-
disable_prompt_cache=no_prompt_cache or omit,
|
|
174
|
-
disable_speculative_decoding=no_speculative_decoding or omit,
|
|
175
|
-
state="STOPPED" if no_auto_start else "STARTED",
|
|
176
|
-
inactive_timeout=inactive_timeout,
|
|
177
|
-
extra_query={"availability_zone": availability_zone or omit},
|
|
178
|
-
)
|
|
179
|
-
except APIError as e:
|
|
180
|
-
print_api_error(e)
|
|
181
|
-
if "check the hardware api" in str(e.args[0]).lower() or "invalid hardware provided" in str(e.args[0]).lower():
|
|
182
|
-
fetch_and_print_hardware_options(client=client, model=model, print_json=False, available=True)
|
|
183
|
-
|
|
184
|
-
sys.exit(1)
|
|
185
|
-
|
|
186
|
-
# Print detailed information to stderr
|
|
187
|
-
click.echo("Created dedicated endpoint with:", err=True)
|
|
188
|
-
click.echo(f" Model: {model}", err=True)
|
|
189
|
-
click.echo(f" Min replicas: {min_replicas}", err=True)
|
|
190
|
-
click.echo(f" Max replicas: {max_replicas}", err=True)
|
|
191
|
-
click.echo(f" Hardware: {hardware_id}", err=True)
|
|
192
|
-
if display_name:
|
|
193
|
-
click.echo(f" Display name: {display_name}", err=True)
|
|
194
|
-
if no_prompt_cache:
|
|
195
|
-
click.echo(" Prompt cache: disabled", err=True)
|
|
196
|
-
if no_speculative_decoding:
|
|
197
|
-
click.echo(" Speculative decoding: disabled", err=True)
|
|
198
|
-
if no_auto_start:
|
|
199
|
-
click.echo(" Auto-start: disabled", err=True)
|
|
200
|
-
if inactive_timeout is not None:
|
|
201
|
-
click.echo(f" Inactive timeout: {inactive_timeout} minutes", err=True)
|
|
202
|
-
if availability_zone:
|
|
203
|
-
click.echo(f" Availability zone: {availability_zone}", err=True)
|
|
204
|
-
|
|
205
|
-
click.echo(f"Endpoint created successfully, id: {response.id}", err=True)
|
|
206
|
-
|
|
207
|
-
if wait:
|
|
208
|
-
import time
|
|
209
|
-
|
|
210
|
-
click.echo("Waiting for endpoint to be ready...", err=True)
|
|
211
|
-
while client.endpoints.retrieve(response.id).state != "STARTED":
|
|
212
|
-
time.sleep(1)
|
|
213
|
-
click.echo("Endpoint ready", err=True)
|
|
214
|
-
|
|
215
|
-
# Print only the endpoint ID to stdout
|
|
216
|
-
click.echo(response.id)
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
@endpoints.command()
|
|
220
|
-
@click.argument("endpoint-id", required=True)
|
|
221
|
-
@click.option("--json", is_flag=True, help="Print output in JSON format")
|
|
222
|
-
@click.pass_obj
|
|
223
|
-
@handle_api_errors
|
|
224
|
-
def get(client: Together, endpoint_id: str, json: bool) -> None:
|
|
225
|
-
"""Get a dedicated inference endpoint."""
|
|
226
|
-
endpoint = client.endpoints.retrieve(endpoint_id)
|
|
227
|
-
if json:
|
|
228
|
-
import json as json_lib
|
|
229
|
-
|
|
230
|
-
click.echo(json_lib.dumps(endpoint.model_dump(), indent=2, default=datetime_serializer))
|
|
231
|
-
else:
|
|
232
|
-
print_endpoint(endpoint)
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
@endpoints.command()
|
|
236
|
-
@click.option("--model", help="Filter hardware options by model")
|
|
237
|
-
@click.option("--json", is_flag=True, help="Print output in JSON format")
|
|
238
|
-
@click.option(
|
|
239
|
-
"--available",
|
|
240
|
-
is_flag=True,
|
|
241
|
-
help="Print only available hardware options (can only be used if model is passed in)",
|
|
242
|
-
)
|
|
243
|
-
@click.pass_obj
|
|
244
|
-
@handle_api_errors
|
|
245
|
-
def hardware(client: Together, model: str | None, json: bool, available: bool) -> None:
|
|
246
|
-
"""List all available hardware options, optionally filtered by model."""
|
|
247
|
-
fetch_and_print_hardware_options(client, model, json, available)
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
def fetch_and_print_hardware_options(client: Together, model: str | None, print_json: bool, available: bool) -> None:
|
|
251
|
-
"""Print hardware options for a model."""
|
|
252
|
-
|
|
253
|
-
message = "Available hardware options:" if available else "All hardware options:"
|
|
254
|
-
click.echo(message, err=True)
|
|
255
|
-
hardware_options = client.hardware.list(model=model or omit)
|
|
256
|
-
# hardware_options = client.endpoints.list_hardware(model)
|
|
257
|
-
if available:
|
|
258
|
-
hardware_options.data = [
|
|
259
|
-
hardware
|
|
260
|
-
for hardware in hardware_options.data
|
|
261
|
-
if hardware.availability is not None and hardware.availability.status == "available"
|
|
262
|
-
]
|
|
263
|
-
|
|
264
|
-
if print_json:
|
|
265
|
-
json_output = [hardware.model_dump() for hardware in hardware_options.data]
|
|
266
|
-
click.echo(json.dumps(json_output, default=datetime_serializer, indent=2))
|
|
267
|
-
else:
|
|
268
|
-
for hardware in hardware_options.data:
|
|
269
|
-
click.echo(f" {hardware.id}", err=True)
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
@endpoints.command()
|
|
273
|
-
@click.argument("endpoint-id", required=True)
|
|
274
|
-
@click.option("--wait/--no-wait", default=True, help="Wait for the endpoint to stop")
|
|
275
|
-
@click.pass_obj
|
|
276
|
-
@handle_api_errors
|
|
277
|
-
def stop(client: Together, endpoint_id: str, wait: bool) -> None:
|
|
278
|
-
"""Stop a dedicated inference endpoint."""
|
|
279
|
-
client.endpoints.update(endpoint_id, state="STOPPED")
|
|
280
|
-
click.echo("Successfully marked endpoint as stopping", err=True)
|
|
281
|
-
|
|
282
|
-
if wait:
|
|
283
|
-
import time
|
|
284
|
-
|
|
285
|
-
click.echo("Waiting for endpoint to stop...", err=True)
|
|
286
|
-
while client.endpoints.retrieve(endpoint_id).state != "STOPPED":
|
|
287
|
-
time.sleep(1)
|
|
288
|
-
click.echo("Endpoint stopped", err=True)
|
|
289
|
-
|
|
290
|
-
click.echo(endpoint_id)
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
@endpoints.command()
|
|
294
|
-
@click.argument("endpoint-id", required=True)
|
|
295
|
-
@click.option("--wait/--no-wait", default=True, help="Wait for the endpoint to start")
|
|
296
|
-
@click.pass_obj
|
|
297
|
-
@handle_api_errors
|
|
298
|
-
def start(client: Together, endpoint_id: str, wait: bool) -> None:
|
|
299
|
-
"""Start a dedicated inference endpoint."""
|
|
300
|
-
client.endpoints.update(endpoint_id, state="STARTED")
|
|
301
|
-
click.echo("Successfully marked endpoint as starting", err=True)
|
|
302
|
-
|
|
303
|
-
if wait:
|
|
304
|
-
import time
|
|
305
|
-
|
|
306
|
-
click.echo("Waiting for endpoint to start...", err=True)
|
|
307
|
-
while client.endpoints.retrieve(endpoint_id).state != "STARTED":
|
|
308
|
-
time.sleep(1)
|
|
309
|
-
click.echo("Endpoint started", err=True)
|
|
310
|
-
|
|
311
|
-
click.echo(endpoint_id)
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
@endpoints.command()
|
|
315
|
-
@click.argument("endpoint-id", required=True)
|
|
316
|
-
@click.pass_obj
|
|
317
|
-
@handle_api_errors
|
|
318
|
-
def delete(client: Together, endpoint_id: str) -> None:
|
|
319
|
-
"""Delete a dedicated inference endpoint."""
|
|
320
|
-
client.endpoints.delete(endpoint_id)
|
|
321
|
-
click.echo("Successfully deleted endpoint", err=True)
|
|
322
|
-
click.echo(endpoint_id)
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
@endpoints.command()
|
|
326
|
-
@click.option("--json", is_flag=True, help="Print output in JSON format")
|
|
327
|
-
@click.option(
|
|
328
|
-
"--type",
|
|
329
|
-
type=click.Choice(["dedicated", "serverless"]),
|
|
330
|
-
help="Filter by endpoint type",
|
|
331
|
-
)
|
|
332
|
-
@click.option(
|
|
333
|
-
"--mine",
|
|
334
|
-
type=click.BOOL,
|
|
335
|
-
default=None,
|
|
336
|
-
help="true (only mine), default=all",
|
|
337
|
-
)
|
|
338
|
-
@click.option(
|
|
339
|
-
"--usage-type",
|
|
340
|
-
type=click.Choice(["on-demand", "reserved"]),
|
|
341
|
-
help="Filter by endpoint usage type",
|
|
342
|
-
)
|
|
343
|
-
@click.pass_obj
|
|
344
|
-
@handle_api_errors
|
|
345
|
-
def list(
|
|
346
|
-
client: Together,
|
|
347
|
-
json: bool,
|
|
348
|
-
type: Literal["dedicated", "serverless"] | None,
|
|
349
|
-
usage_type: Literal["on-demand", "reserved"] | None,
|
|
350
|
-
mine: bool | None,
|
|
351
|
-
) -> None:
|
|
352
|
-
"""List all inference endpoints (includes both dedicated and serverless endpoints)."""
|
|
353
|
-
endpoints = client.endpoints.list(
|
|
354
|
-
type=type or omit,
|
|
355
|
-
usage_type=usage_type or omit,
|
|
356
|
-
mine=mine if mine is not None else omit,
|
|
357
|
-
)
|
|
358
|
-
|
|
359
|
-
if not endpoints:
|
|
360
|
-
click.echo("No dedicated endpoints found", err=True)
|
|
361
|
-
return
|
|
362
|
-
|
|
363
|
-
click.echo("Endpoints:", err=True)
|
|
364
|
-
if json:
|
|
365
|
-
import json as json_lib
|
|
366
|
-
|
|
367
|
-
click.echo(
|
|
368
|
-
json_lib.dumps(
|
|
369
|
-
[endpoint.model_dump() for endpoint in endpoints.data], default=datetime_serializer, indent=2
|
|
370
|
-
)
|
|
371
|
-
)
|
|
372
|
-
else:
|
|
373
|
-
for endpoint in endpoints.data:
|
|
374
|
-
print_endpoint(
|
|
375
|
-
endpoint,
|
|
376
|
-
)
|
|
377
|
-
click.echo()
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
@endpoints.command()
|
|
381
|
-
@click.argument("endpoint-id", required=True)
|
|
382
|
-
@click.option(
|
|
383
|
-
"--display-name",
|
|
384
|
-
help="A new human-readable name for the endpoint",
|
|
385
|
-
)
|
|
386
|
-
@click.option(
|
|
387
|
-
"--min-replicas",
|
|
388
|
-
type=int,
|
|
389
|
-
help="New minimum number of replicas to maintain",
|
|
390
|
-
)
|
|
391
|
-
@click.option(
|
|
392
|
-
"--max-replicas",
|
|
393
|
-
type=int,
|
|
394
|
-
help="New maximum number of replicas to scale up to",
|
|
395
|
-
)
|
|
396
|
-
@click.option(
|
|
397
|
-
"--inactive-timeout",
|
|
398
|
-
type=int,
|
|
399
|
-
help="Number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable.",
|
|
400
|
-
)
|
|
401
|
-
@click.pass_obj
|
|
402
|
-
@handle_api_errors
|
|
403
|
-
def update(
|
|
404
|
-
client: Together,
|
|
405
|
-
endpoint_id: str,
|
|
406
|
-
display_name: str | None,
|
|
407
|
-
min_replicas: int | None,
|
|
408
|
-
max_replicas: int | None,
|
|
409
|
-
inactive_timeout: int | None,
|
|
410
|
-
) -> None:
|
|
411
|
-
"""Update a dedicated inference endpoint's configuration."""
|
|
412
|
-
if not any([display_name, min_replicas, max_replicas, inactive_timeout]):
|
|
413
|
-
click.echo("Error: At least one update option must be specified", err=True)
|
|
414
|
-
sys.exit(1)
|
|
415
|
-
|
|
416
|
-
# Build kwargs for the update
|
|
417
|
-
kwargs: Dict[str, Any] = {}
|
|
418
|
-
if display_name is not None:
|
|
419
|
-
kwargs["display_name"] = display_name
|
|
420
|
-
|
|
421
|
-
if min_replicas is not None or max_replicas is not None:
|
|
422
|
-
kwargs["autoscaling"] = {}
|
|
423
|
-
if min_replicas is not None:
|
|
424
|
-
kwargs["autoscaling"]["min_replicas"] = min_replicas
|
|
425
|
-
if max_replicas is not None:
|
|
426
|
-
kwargs["autoscaling"]["max_replicas"] = max_replicas
|
|
427
|
-
|
|
428
|
-
if inactive_timeout is not None:
|
|
429
|
-
kwargs["inactive_timeout"] = inactive_timeout
|
|
430
|
-
|
|
431
|
-
client.endpoints.update(endpoint_id, **kwargs)
|
|
432
|
-
|
|
433
|
-
# Print what was updated
|
|
434
|
-
click.echo("Updated endpoint configuration:", err=True)
|
|
435
|
-
if display_name:
|
|
436
|
-
click.echo(f" Display name: {display_name}", err=True)
|
|
437
|
-
if min_replicas:
|
|
438
|
-
click.echo(f" Min replicas: {min_replicas}", err=True)
|
|
439
|
-
if max_replicas:
|
|
440
|
-
click.echo(f" Max replicas: {max_replicas}", err=True)
|
|
441
|
-
if inactive_timeout is not None:
|
|
442
|
-
click.echo(f" Inactive timeout: {inactive_timeout} minutes", err=True)
|
|
443
|
-
|
|
444
|
-
click.echo("Successfully updated endpoint", err=True)
|
|
445
|
-
click.echo(endpoint_id)
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
@endpoints.command()
|
|
449
|
-
@click.option("--json", is_flag=True, help="Print output in JSON format")
|
|
450
|
-
@click.pass_obj
|
|
451
|
-
@handle_api_errors
|
|
452
|
-
def availability_zones(client: Together, json: bool) -> None:
|
|
453
|
-
"""List all availability zones."""
|
|
454
|
-
avzones = client.endpoints.list_avzones()
|
|
455
|
-
|
|
456
|
-
if not avzones:
|
|
457
|
-
click.echo("No availability zones found", err=True)
|
|
458
|
-
return
|
|
459
|
-
|
|
460
|
-
if json:
|
|
461
|
-
import json as json_lib
|
|
462
|
-
|
|
463
|
-
click.echo(json_lib.dumps(avzones.model_dump(), indent=2))
|
|
464
|
-
else:
|
|
465
|
-
click.echo("Available zones:", err=True)
|
|
466
|
-
for availability_zone in sorted(avzones.avzones):
|
|
467
|
-
click.echo(f" {availability_zone}")
|
together/lib/cli/api/files.py
DELETED
|
@@ -1,133 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import pathlib
|
|
3
|
-
from typing import Any, Dict, List, get_args
|
|
4
|
-
from textwrap import wrap
|
|
5
|
-
|
|
6
|
-
import click
|
|
7
|
-
from tabulate import tabulate
|
|
8
|
-
|
|
9
|
-
from together import Together
|
|
10
|
-
from together.types import FilePurpose
|
|
11
|
-
|
|
12
|
-
# from together.utils import check_file, convert_bytes, convert_unix_timestamp
|
|
13
|
-
from ...utils import check_file, convert_bytes, convert_unix_timestamp
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
@click.group()
|
|
17
|
-
@click.pass_context
|
|
18
|
-
def files(ctx: click.Context) -> None:
|
|
19
|
-
"""File API commands"""
|
|
20
|
-
pass
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
@files.command()
|
|
24
|
-
@click.pass_context
|
|
25
|
-
@click.argument(
|
|
26
|
-
"file",
|
|
27
|
-
type=click.Path(exists=True, file_okay=True, resolve_path=True, readable=True, dir_okay=False),
|
|
28
|
-
required=True,
|
|
29
|
-
)
|
|
30
|
-
@click.option(
|
|
31
|
-
"--purpose",
|
|
32
|
-
type=click.Choice(get_args(FilePurpose)),
|
|
33
|
-
default="fine-tune",
|
|
34
|
-
help="Purpose of file upload. Acceptable values in enum `together.types.FilePurpose`. Defaults to `fine-tunes`.",
|
|
35
|
-
)
|
|
36
|
-
@click.option(
|
|
37
|
-
"--check/--no-check",
|
|
38
|
-
default=True,
|
|
39
|
-
help="Whether to check the file before uploading.",
|
|
40
|
-
)
|
|
41
|
-
def upload(ctx: click.Context, file: pathlib.Path, purpose: FilePurpose, check: bool) -> None:
|
|
42
|
-
"""Upload file"""
|
|
43
|
-
|
|
44
|
-
client: Together = ctx.obj
|
|
45
|
-
|
|
46
|
-
response = client.files.upload(file=file, purpose=purpose, check=check)
|
|
47
|
-
|
|
48
|
-
click.echo(json.dumps(response.model_dump(exclude_none=True), indent=4))
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
@files.command()
|
|
52
|
-
@click.pass_context
|
|
53
|
-
def list(ctx: click.Context) -> None:
|
|
54
|
-
"""List files"""
|
|
55
|
-
client: Together = ctx.obj
|
|
56
|
-
|
|
57
|
-
response = client.files.list()
|
|
58
|
-
|
|
59
|
-
display_list: List[Dict[str, Any]] = []
|
|
60
|
-
for i in response.data or []:
|
|
61
|
-
display_list.append(
|
|
62
|
-
{
|
|
63
|
-
"File name": "\n".join(wrap(i.filename or "", width=30)),
|
|
64
|
-
"File ID": i.id,
|
|
65
|
-
"Size": convert_bytes(float(str(i.bytes))), # convert to string for mypy typing
|
|
66
|
-
"Created At": convert_unix_timestamp(i.created_at or 0),
|
|
67
|
-
"Line Count": i.line_count,
|
|
68
|
-
}
|
|
69
|
-
)
|
|
70
|
-
table = tabulate(display_list, headers="keys", tablefmt="grid", showindex=True)
|
|
71
|
-
|
|
72
|
-
click.echo(table)
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
@files.command()
|
|
76
|
-
@click.pass_context
|
|
77
|
-
@click.argument("id", type=str, required=True)
|
|
78
|
-
def retrieve(ctx: click.Context, id: str) -> None:
|
|
79
|
-
"""Upload file"""
|
|
80
|
-
|
|
81
|
-
client: Together = ctx.obj
|
|
82
|
-
|
|
83
|
-
response = client.files.retrieve(id=id)
|
|
84
|
-
|
|
85
|
-
click.echo(json.dumps(response.model_dump(exclude_none=True), indent=4))
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
@files.command()
|
|
89
|
-
@click.pass_context
|
|
90
|
-
@click.argument("id", type=str, required=True)
|
|
91
|
-
@click.option("--output", type=str, default=None, help="Output filename")
|
|
92
|
-
def retrieve_content(ctx: click.Context, id: str, output: str) -> None:
|
|
93
|
-
"""Retrieve file content and output to file"""
|
|
94
|
-
|
|
95
|
-
client: Together = ctx.obj
|
|
96
|
-
|
|
97
|
-
response = client.files.content(id=id)
|
|
98
|
-
|
|
99
|
-
if output:
|
|
100
|
-
with open(output, "wb") as f:
|
|
101
|
-
f.write(response.read())
|
|
102
|
-
click.echo(f"File saved to {output}")
|
|
103
|
-
|
|
104
|
-
else:
|
|
105
|
-
click.echo(response.read().decode("utf-8"))
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
@files.command()
|
|
109
|
-
@click.pass_context
|
|
110
|
-
@click.argument("id", type=str, required=True)
|
|
111
|
-
def delete(ctx: click.Context, id: str) -> None:
|
|
112
|
-
"""Delete remote file"""
|
|
113
|
-
|
|
114
|
-
client: Together = ctx.obj
|
|
115
|
-
|
|
116
|
-
response = client.files.delete(id=id)
|
|
117
|
-
|
|
118
|
-
click.echo(json.dumps(response.model_dump(exclude_none=True), indent=4))
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
@files.command()
|
|
122
|
-
@click.pass_context
|
|
123
|
-
@click.argument(
|
|
124
|
-
"file",
|
|
125
|
-
type=click.Path(exists=True, file_okay=True, resolve_path=True, readable=True, dir_okay=False),
|
|
126
|
-
required=True,
|
|
127
|
-
)
|
|
128
|
-
def check(_ctx: click.Context, file: pathlib.Path) -> None:
|
|
129
|
-
"""Check file for issues"""
|
|
130
|
-
|
|
131
|
-
report = check_file(file)
|
|
132
|
-
|
|
133
|
-
click.echo(json.dumps(report, indent=4))
|
together/lib/cli/cli.py
DELETED
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
from typing import Any
|
|
5
|
-
|
|
6
|
-
import click
|
|
7
|
-
|
|
8
|
-
import together
|
|
9
|
-
from together._version import __version__
|
|
10
|
-
from together._constants import DEFAULT_TIMEOUT
|
|
11
|
-
from together.lib.cli.api.evals import evals
|
|
12
|
-
from together.lib.cli.api.files import files
|
|
13
|
-
from together.lib.cli.api.models import models
|
|
14
|
-
from together.lib.cli.api.beta.beta import beta
|
|
15
|
-
from together.lib.cli.api.endpoints import endpoints
|
|
16
|
-
from together.lib.cli.api.fine_tuning import fine_tuning
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def print_version(ctx: click.Context, _params: Any, value: Any) -> None:
|
|
20
|
-
if not value or ctx.resilient_parsing:
|
|
21
|
-
return
|
|
22
|
-
click.echo(f"Version {__version__}")
|
|
23
|
-
ctx.exit()
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
@click.group()
|
|
27
|
-
@click.pass_context
|
|
28
|
-
@click.option(
|
|
29
|
-
"--api-key",
|
|
30
|
-
type=str,
|
|
31
|
-
help="API Key. Defaults to environment variable `TOGETHER_API_KEY`",
|
|
32
|
-
default=os.getenv("TOGETHER_API_KEY"),
|
|
33
|
-
)
|
|
34
|
-
@click.option("--base-url", type=str, help="API Base URL. Defaults to Together AI endpoint.")
|
|
35
|
-
@click.option("--timeout", type=int, help=f"Request timeout. Defaults to {DEFAULT_TIMEOUT} seconds")
|
|
36
|
-
@click.option(
|
|
37
|
-
"--max-retries",
|
|
38
|
-
type=int,
|
|
39
|
-
help=f"Maximum number of HTTP retries.",
|
|
40
|
-
)
|
|
41
|
-
@click.option(
|
|
42
|
-
"--version",
|
|
43
|
-
is_flag=True,
|
|
44
|
-
callback=print_version,
|
|
45
|
-
expose_value=False,
|
|
46
|
-
is_eager=True,
|
|
47
|
-
help="Print version",
|
|
48
|
-
)
|
|
49
|
-
@click.option("--debug", help="Debug mode", is_flag=True)
|
|
50
|
-
def main(
|
|
51
|
-
ctx: click.Context,
|
|
52
|
-
api_key: str | None,
|
|
53
|
-
base_url: str | None,
|
|
54
|
-
timeout: int | None,
|
|
55
|
-
debug: bool | None,
|
|
56
|
-
max_retries: int | None,
|
|
57
|
-
) -> None:
|
|
58
|
-
"""This is a sample CLI tool."""
|
|
59
|
-
os.environ.setdefault("TOGETHER_LOG", "debug" if debug else "info")
|
|
60
|
-
ctx.obj = together.Together(
|
|
61
|
-
api_key=api_key, base_url=base_url, timeout=timeout, max_retries=max_retries if max_retries is not None else 0
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
main.add_command(files)
|
|
66
|
-
main.add_command(fine_tuning)
|
|
67
|
-
main.add_command(models)
|
|
68
|
-
main.add_command(endpoints)
|
|
69
|
-
main.add_command(evals)
|
|
70
|
-
main.add_command(beta)
|
|
71
|
-
|
|
72
|
-
if __name__ == "__main__":
|
|
73
|
-
main()
|