together 1.3.14__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- together/abstract/api_requestor.py +7 -9
- together/cli/api/endpoints.py +415 -0
- together/cli/cli.py +2 -0
- together/client.py +1 -0
- together/error.py +3 -0
- together/resources/__init__.py +4 -1
- together/resources/endpoints.py +488 -0
- together/types/__init__.py +19 -20
- together/types/endpoints.py +123 -0
- {together-1.3.14.dist-info → together-1.4.1.dist-info}/METADATA +3 -4
- {together-1.3.14.dist-info → together-1.4.1.dist-info}/RECORD +14 -11
- {together-1.3.14.dist-info → together-1.4.1.dist-info}/LICENSE +0 -0
- {together-1.3.14.dist-info → together-1.4.1.dist-info}/WHEEL +0 -0
- {together-1.3.14.dist-info → together-1.4.1.dist-info}/entry_points.txt +0 -0
|
@@ -437,7 +437,7 @@ class APIRequestor:
|
|
|
437
437
|
[(k, v) for k, v in options.params.items() if v is not None]
|
|
438
438
|
)
|
|
439
439
|
abs_url = _build_api_url(abs_url, encoded_params)
|
|
440
|
-
elif options.method.lower() in {"post", "put"}:
|
|
440
|
+
elif options.method.lower() in {"post", "put", "patch"}:
|
|
441
441
|
if options.params and (options.files or options.override_headers):
|
|
442
442
|
data = options.params
|
|
443
443
|
elif options.params and not options.files:
|
|
@@ -587,16 +587,14 @@ class APIRequestor:
|
|
|
587
587
|
)
|
|
588
588
|
headers["Content-Type"] = content_type
|
|
589
589
|
|
|
590
|
-
request_kwargs = {
|
|
591
|
-
"headers": headers,
|
|
592
|
-
"data": data,
|
|
593
|
-
"timeout": timeout,
|
|
594
|
-
"allow_redirects": options.allow_redirects,
|
|
595
|
-
}
|
|
596
|
-
|
|
597
590
|
try:
|
|
598
591
|
result = await session.request(
|
|
599
|
-
method=options.method,
|
|
592
|
+
method=options.method,
|
|
593
|
+
url=abs_url,
|
|
594
|
+
headers=headers,
|
|
595
|
+
data=data,
|
|
596
|
+
timeout=timeout,
|
|
597
|
+
allow_redirects=options.allow_redirects,
|
|
600
598
|
)
|
|
601
599
|
utils.log_debug(
|
|
602
600
|
"Together API response",
|
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import sys
|
|
5
|
+
from functools import wraps
|
|
6
|
+
from typing import Any, Callable, Dict, List, Literal, TypeVar, Union
|
|
7
|
+
|
|
8
|
+
import click
|
|
9
|
+
|
|
10
|
+
from together import Together
|
|
11
|
+
from together.error import InvalidRequestError
|
|
12
|
+
from together.types import DedicatedEndpoint, ListEndpoint
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def print_endpoint(
|
|
16
|
+
endpoint: Union[DedicatedEndpoint, ListEndpoint],
|
|
17
|
+
) -> None:
|
|
18
|
+
"""Print endpoint details in a Docker-like format or JSON."""
|
|
19
|
+
|
|
20
|
+
# Print header info
|
|
21
|
+
click.echo(f"ID:\t\t{endpoint.id}")
|
|
22
|
+
click.echo(f"Name:\t\t{endpoint.name}")
|
|
23
|
+
|
|
24
|
+
# Print type-specific fields
|
|
25
|
+
if isinstance(endpoint, DedicatedEndpoint):
|
|
26
|
+
click.echo(f"Display Name:\t{endpoint.display_name}")
|
|
27
|
+
click.echo(f"Hardware:\t{endpoint.hardware}")
|
|
28
|
+
click.echo(
|
|
29
|
+
f"Autoscaling:\tMin={endpoint.autoscaling.min_replicas}, "
|
|
30
|
+
f"Max={endpoint.autoscaling.max_replicas}"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
click.echo(f"Model:\t\t{endpoint.model}")
|
|
34
|
+
click.echo(f"Type:\t\t{endpoint.type}")
|
|
35
|
+
click.echo(f"Owner:\t\t{endpoint.owner}")
|
|
36
|
+
click.echo(f"State:\t\t{endpoint.state}")
|
|
37
|
+
click.echo(f"Created:\t{endpoint.created_at}")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
F = TypeVar("F", bound=Callable[..., Any])
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def print_api_error(
|
|
44
|
+
e: InvalidRequestError,
|
|
45
|
+
) -> None:
|
|
46
|
+
error_details = e.api_response.message
|
|
47
|
+
|
|
48
|
+
if error_details and (
|
|
49
|
+
"credentials" in error_details.lower()
|
|
50
|
+
or "authentication" in error_details.lower()
|
|
51
|
+
):
|
|
52
|
+
click.echo("Error: Invalid API key or authentication failed", err=True)
|
|
53
|
+
else:
|
|
54
|
+
click.echo(f"Error: {error_details}", err=True)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def handle_api_errors(f: F) -> F:
|
|
58
|
+
"""Decorator to handle common API errors in CLI commands."""
|
|
59
|
+
|
|
60
|
+
@wraps(f)
|
|
61
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
62
|
+
try:
|
|
63
|
+
return f(*args, **kwargs)
|
|
64
|
+
except InvalidRequestError as e:
|
|
65
|
+
print_api_error(e)
|
|
66
|
+
sys.exit(1)
|
|
67
|
+
except Exception as e:
|
|
68
|
+
click.echo(f"Error: An unexpected error occurred - {str(e)}", err=True)
|
|
69
|
+
sys.exit(1)
|
|
70
|
+
|
|
71
|
+
return wrapper # type: ignore
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@click.group()
|
|
75
|
+
@click.pass_context
|
|
76
|
+
def endpoints(ctx: click.Context) -> None:
|
|
77
|
+
"""Endpoints API commands"""
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@endpoints.command()
|
|
82
|
+
@click.option(
|
|
83
|
+
"--model",
|
|
84
|
+
required=True,
|
|
85
|
+
help="The model to deploy (e.g. mistralai/Mixtral-8x7B-Instruct-v0.1)",
|
|
86
|
+
)
|
|
87
|
+
@click.option(
|
|
88
|
+
"--min-replicas",
|
|
89
|
+
type=int,
|
|
90
|
+
default=1,
|
|
91
|
+
help="Minimum number of replicas to deploy",
|
|
92
|
+
)
|
|
93
|
+
@click.option(
|
|
94
|
+
"--max-replicas",
|
|
95
|
+
type=int,
|
|
96
|
+
default=1,
|
|
97
|
+
help="Maximum number of replicas to deploy",
|
|
98
|
+
)
|
|
99
|
+
@click.option(
|
|
100
|
+
"--gpu",
|
|
101
|
+
type=click.Choice(["h100", "a100", "l40", "l40s", "rtx-6000"]),
|
|
102
|
+
required=True,
|
|
103
|
+
help="GPU type to use for inference",
|
|
104
|
+
)
|
|
105
|
+
@click.option(
|
|
106
|
+
"--gpu-count",
|
|
107
|
+
type=int,
|
|
108
|
+
default=1,
|
|
109
|
+
help="Number of GPUs to use per replica",
|
|
110
|
+
)
|
|
111
|
+
@click.option(
|
|
112
|
+
"--display-name",
|
|
113
|
+
help="A human-readable name for the endpoint",
|
|
114
|
+
)
|
|
115
|
+
@click.option(
|
|
116
|
+
"--no-prompt-cache",
|
|
117
|
+
is_flag=True,
|
|
118
|
+
help="Disable the prompt cache for this endpoint",
|
|
119
|
+
)
|
|
120
|
+
@click.option(
|
|
121
|
+
"--no-speculative-decoding",
|
|
122
|
+
is_flag=True,
|
|
123
|
+
help="Disable speculative decoding for this endpoint",
|
|
124
|
+
)
|
|
125
|
+
@click.option(
|
|
126
|
+
"--no-auto-start",
|
|
127
|
+
is_flag=True,
|
|
128
|
+
help="Create the endpoint in STOPPED state instead of auto-starting it",
|
|
129
|
+
)
|
|
130
|
+
@click.option(
|
|
131
|
+
"--wait",
|
|
132
|
+
is_flag=True,
|
|
133
|
+
default=True,
|
|
134
|
+
help="Wait for the endpoint to be ready after creation",
|
|
135
|
+
)
|
|
136
|
+
@click.pass_obj
|
|
137
|
+
@handle_api_errors
|
|
138
|
+
def create(
|
|
139
|
+
client: Together,
|
|
140
|
+
model: str,
|
|
141
|
+
min_replicas: int,
|
|
142
|
+
max_replicas: int,
|
|
143
|
+
gpu: str,
|
|
144
|
+
gpu_count: int,
|
|
145
|
+
display_name: str | None,
|
|
146
|
+
no_prompt_cache: bool,
|
|
147
|
+
no_speculative_decoding: bool,
|
|
148
|
+
no_auto_start: bool,
|
|
149
|
+
wait: bool,
|
|
150
|
+
) -> None:
|
|
151
|
+
"""Create a new dedicated inference endpoint."""
|
|
152
|
+
# Map GPU types to their full hardware ID names
|
|
153
|
+
gpu_map = {
|
|
154
|
+
"h100": "nvidia_h100_80gb_sxm",
|
|
155
|
+
"a100": "nvidia_a100_80gb_pcie" if gpu_count == 1 else "nvidia_a100_80gb_sxm",
|
|
156
|
+
"l40": "nvidia_l40",
|
|
157
|
+
"l40s": "nvidia_l40s",
|
|
158
|
+
"rtx-6000": "nvidia_rtx_6000_ada",
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
hardware_id = f"{gpu_count}x_{gpu_map[gpu]}"
|
|
162
|
+
|
|
163
|
+
try:
|
|
164
|
+
response = client.endpoints.create(
|
|
165
|
+
model=model,
|
|
166
|
+
hardware=hardware_id,
|
|
167
|
+
min_replicas=min_replicas,
|
|
168
|
+
max_replicas=max_replicas,
|
|
169
|
+
display_name=display_name,
|
|
170
|
+
disable_prompt_cache=no_prompt_cache,
|
|
171
|
+
disable_speculative_decoding=no_speculative_decoding,
|
|
172
|
+
state="STOPPED" if no_auto_start else "STARTED",
|
|
173
|
+
)
|
|
174
|
+
except InvalidRequestError as e:
|
|
175
|
+
print_api_error(e)
|
|
176
|
+
if "check the hardware api" in str(e).lower():
|
|
177
|
+
fetch_and_print_hardware_options(
|
|
178
|
+
client=client, model=model, print_json=False, available=True
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
sys.exit(1)
|
|
182
|
+
|
|
183
|
+
# Print detailed information to stderr
|
|
184
|
+
click.echo("Created dedicated endpoint with:", err=True)
|
|
185
|
+
click.echo(f" Model: {model}", err=True)
|
|
186
|
+
click.echo(f" Min replicas: {min_replicas}", err=True)
|
|
187
|
+
click.echo(f" Max replicas: {max_replicas}", err=True)
|
|
188
|
+
click.echo(f" Hardware: {hardware_id}", err=True)
|
|
189
|
+
if display_name:
|
|
190
|
+
click.echo(f" Display name: {display_name}", err=True)
|
|
191
|
+
if no_prompt_cache:
|
|
192
|
+
click.echo(" Prompt cache: disabled", err=True)
|
|
193
|
+
if no_speculative_decoding:
|
|
194
|
+
click.echo(" Speculative decoding: disabled", err=True)
|
|
195
|
+
if no_auto_start:
|
|
196
|
+
click.echo(" Auto-start: disabled", err=True)
|
|
197
|
+
|
|
198
|
+
click.echo(f"Endpoint created successfully, id: {response.id}", err=True)
|
|
199
|
+
|
|
200
|
+
if wait:
|
|
201
|
+
import time
|
|
202
|
+
|
|
203
|
+
click.echo("Waiting for endpoint to be ready...", err=True)
|
|
204
|
+
while client.endpoints.get(response.id).state != "STARTED":
|
|
205
|
+
time.sleep(1)
|
|
206
|
+
click.echo("Endpoint ready", err=True)
|
|
207
|
+
|
|
208
|
+
# Print only the endpoint ID to stdout
|
|
209
|
+
click.echo(response.id)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
@endpoints.command()
|
|
213
|
+
@click.argument("endpoint-id", required=True)
|
|
214
|
+
@click.option("--json", is_flag=True, help="Print output in JSON format")
|
|
215
|
+
@click.pass_obj
|
|
216
|
+
@handle_api_errors
|
|
217
|
+
def get(client: Together, endpoint_id: str, json: bool) -> None:
|
|
218
|
+
"""Get a dedicated inference endpoint."""
|
|
219
|
+
endpoint = client.endpoints.get(endpoint_id)
|
|
220
|
+
if json:
|
|
221
|
+
import json as json_lib
|
|
222
|
+
|
|
223
|
+
click.echo(json_lib.dumps(endpoint.model_dump(), indent=2))
|
|
224
|
+
else:
|
|
225
|
+
print_endpoint(endpoint)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
@endpoints.command()
|
|
229
|
+
@click.option("--model", help="Filter hardware options by model")
|
|
230
|
+
@click.option("--json", is_flag=True, help="Print output in JSON format")
|
|
231
|
+
@click.option(
|
|
232
|
+
"--available",
|
|
233
|
+
is_flag=True,
|
|
234
|
+
help="Print only available hardware options (can only be used if model is passed in)",
|
|
235
|
+
)
|
|
236
|
+
@click.pass_obj
|
|
237
|
+
@handle_api_errors
|
|
238
|
+
def hardware(client: Together, model: str | None, json: bool, available: bool) -> None:
|
|
239
|
+
"""List all available hardware options, optionally filtered by model."""
|
|
240
|
+
fetch_and_print_hardware_options(client, model, json, available)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def fetch_and_print_hardware_options(
|
|
244
|
+
client: Together, model: str | None, print_json: bool, available: bool
|
|
245
|
+
) -> None:
|
|
246
|
+
"""Print hardware options for a model."""
|
|
247
|
+
|
|
248
|
+
message = "Available hardware options:" if available else "All hardware options:"
|
|
249
|
+
click.echo(message, err=True)
|
|
250
|
+
hardware_options = client.endpoints.list_hardware(model)
|
|
251
|
+
if available:
|
|
252
|
+
hardware_options = [
|
|
253
|
+
hardware
|
|
254
|
+
for hardware in hardware_options
|
|
255
|
+
if hardware.availability is not None
|
|
256
|
+
and hardware.availability.status == "available"
|
|
257
|
+
]
|
|
258
|
+
|
|
259
|
+
if print_json:
|
|
260
|
+
json_output = [hardware.model_dump() for hardware in hardware_options]
|
|
261
|
+
click.echo(json.dumps(json_output, indent=2))
|
|
262
|
+
else:
|
|
263
|
+
for hardware in hardware_options:
|
|
264
|
+
click.echo(f" {hardware.id}", err=True)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
@endpoints.command()
|
|
268
|
+
@click.argument("endpoint-id", required=True)
|
|
269
|
+
@click.option(
|
|
270
|
+
"--wait", is_flag=True, default=True, help="Wait for the endpoint to stop"
|
|
271
|
+
)
|
|
272
|
+
@click.pass_obj
|
|
273
|
+
@handle_api_errors
|
|
274
|
+
def stop(client: Together, endpoint_id: str, wait: bool) -> None:
|
|
275
|
+
"""Stop a dedicated inference endpoint."""
|
|
276
|
+
client.endpoints.update(endpoint_id, state="STOPPED")
|
|
277
|
+
click.echo("Successfully marked endpoint as stopping", err=True)
|
|
278
|
+
|
|
279
|
+
if wait:
|
|
280
|
+
import time
|
|
281
|
+
|
|
282
|
+
click.echo("Waiting for endpoint to stop...", err=True)
|
|
283
|
+
while client.endpoints.get(endpoint_id).state != "STOPPED":
|
|
284
|
+
time.sleep(1)
|
|
285
|
+
click.echo("Endpoint stopped", err=True)
|
|
286
|
+
|
|
287
|
+
click.echo(endpoint_id)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
@endpoints.command()
|
|
291
|
+
@click.argument("endpoint-id", required=True)
|
|
292
|
+
@click.option(
|
|
293
|
+
"--wait", is_flag=True, default=True, help="Wait for the endpoint to start"
|
|
294
|
+
)
|
|
295
|
+
@click.pass_obj
|
|
296
|
+
@handle_api_errors
|
|
297
|
+
def start(client: Together, endpoint_id: str, wait: bool) -> None:
|
|
298
|
+
"""Start a dedicated inference endpoint."""
|
|
299
|
+
client.endpoints.update(endpoint_id, state="STARTED")
|
|
300
|
+
click.echo("Successfully marked endpoint as starting", err=True)
|
|
301
|
+
|
|
302
|
+
if wait:
|
|
303
|
+
import time
|
|
304
|
+
|
|
305
|
+
click.echo("Waiting for endpoint to start...", err=True)
|
|
306
|
+
while client.endpoints.get(endpoint_id).state != "STARTED":
|
|
307
|
+
time.sleep(1)
|
|
308
|
+
click.echo("Endpoint started", err=True)
|
|
309
|
+
|
|
310
|
+
click.echo(endpoint_id)
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
@endpoints.command()
|
|
314
|
+
@click.argument("endpoint-id", required=True)
|
|
315
|
+
@click.pass_obj
|
|
316
|
+
@handle_api_errors
|
|
317
|
+
def delete(client: Together, endpoint_id: str) -> None:
|
|
318
|
+
"""Delete a dedicated inference endpoint."""
|
|
319
|
+
client.endpoints.delete(endpoint_id)
|
|
320
|
+
click.echo("Successfully deleted endpoint", err=True)
|
|
321
|
+
click.echo(endpoint_id)
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
@endpoints.command()
|
|
325
|
+
@click.option("--json", is_flag=True, help="Print output in JSON format")
|
|
326
|
+
@click.option(
|
|
327
|
+
"--type",
|
|
328
|
+
type=click.Choice(["dedicated", "serverless"]),
|
|
329
|
+
help="Filter by endpoint type",
|
|
330
|
+
)
|
|
331
|
+
@click.pass_obj
|
|
332
|
+
@handle_api_errors
|
|
333
|
+
def list(
|
|
334
|
+
client: Together, json: bool, type: Literal["dedicated", "serverless"] | None
|
|
335
|
+
) -> None:
|
|
336
|
+
"""List all inference endpoints (includes both dedicated and serverless endpoints)."""
|
|
337
|
+
endpoints: List[ListEndpoint] = client.endpoints.list(type=type)
|
|
338
|
+
|
|
339
|
+
if not endpoints:
|
|
340
|
+
click.echo("No dedicated endpoints found", err=True)
|
|
341
|
+
return
|
|
342
|
+
|
|
343
|
+
click.echo("Endpoints:", err=True)
|
|
344
|
+
if json:
|
|
345
|
+
import json as json_lib
|
|
346
|
+
|
|
347
|
+
click.echo(
|
|
348
|
+
json_lib.dumps([endpoint.model_dump() for endpoint in endpoints], indent=2)
|
|
349
|
+
)
|
|
350
|
+
else:
|
|
351
|
+
for endpoint in endpoints:
|
|
352
|
+
print_endpoint(
|
|
353
|
+
endpoint,
|
|
354
|
+
)
|
|
355
|
+
click.echo()
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
@endpoints.command()
|
|
359
|
+
@click.argument("endpoint-id", required=True)
|
|
360
|
+
@click.option(
|
|
361
|
+
"--display-name",
|
|
362
|
+
help="A new human-readable name for the endpoint",
|
|
363
|
+
)
|
|
364
|
+
@click.option(
|
|
365
|
+
"--min-replicas",
|
|
366
|
+
type=int,
|
|
367
|
+
help="New minimum number of replicas to maintain",
|
|
368
|
+
)
|
|
369
|
+
@click.option(
|
|
370
|
+
"--max-replicas",
|
|
371
|
+
type=int,
|
|
372
|
+
help="New maximum number of replicas to scale up to",
|
|
373
|
+
)
|
|
374
|
+
@click.pass_obj
|
|
375
|
+
@handle_api_errors
|
|
376
|
+
def update(
|
|
377
|
+
client: Together,
|
|
378
|
+
endpoint_id: str,
|
|
379
|
+
display_name: str | None,
|
|
380
|
+
min_replicas: int | None,
|
|
381
|
+
max_replicas: int | None,
|
|
382
|
+
) -> None:
|
|
383
|
+
"""Update a dedicated inference endpoint's configuration."""
|
|
384
|
+
if not any([display_name, min_replicas, max_replicas]):
|
|
385
|
+
click.echo("Error: At least one update option must be specified", err=True)
|
|
386
|
+
sys.exit(1)
|
|
387
|
+
|
|
388
|
+
# If only one of min/max replicas is specified, we need both for the update
|
|
389
|
+
if (min_replicas is None) != (max_replicas is None):
|
|
390
|
+
click.echo(
|
|
391
|
+
"Error: Both --min-replicas and --max-replicas must be specified together",
|
|
392
|
+
err=True,
|
|
393
|
+
)
|
|
394
|
+
sys.exit(1)
|
|
395
|
+
|
|
396
|
+
# Build kwargs for the update
|
|
397
|
+
kwargs: Dict[str, Any] = {}
|
|
398
|
+
if display_name is not None:
|
|
399
|
+
kwargs["display_name"] = display_name
|
|
400
|
+
if min_replicas is not None and max_replicas is not None:
|
|
401
|
+
kwargs["min_replicas"] = min_replicas
|
|
402
|
+
kwargs["max_replicas"] = max_replicas
|
|
403
|
+
|
|
404
|
+
_response = client.endpoints.update(endpoint_id, **kwargs)
|
|
405
|
+
|
|
406
|
+
# Print what was updated
|
|
407
|
+
click.echo("Updated endpoint configuration:", err=True)
|
|
408
|
+
if display_name:
|
|
409
|
+
click.echo(f" Display name: {display_name}", err=True)
|
|
410
|
+
if min_replicas is not None and max_replicas is not None:
|
|
411
|
+
click.echo(f" Min replicas: {min_replicas}", err=True)
|
|
412
|
+
click.echo(f" Max replicas: {max_replicas}", err=True)
|
|
413
|
+
|
|
414
|
+
click.echo("Successfully updated endpoint", err=True)
|
|
415
|
+
click.echo(endpoint_id)
|
together/cli/cli.py
CHANGED
|
@@ -8,6 +8,7 @@ import click
|
|
|
8
8
|
import together
|
|
9
9
|
from together.cli.api.chat import chat, interactive
|
|
10
10
|
from together.cli.api.completions import completions
|
|
11
|
+
from together.cli.api.endpoints import endpoints
|
|
11
12
|
from together.cli.api.files import files
|
|
12
13
|
from together.cli.api.finetune import fine_tuning
|
|
13
14
|
from together.cli.api.images import images
|
|
@@ -72,6 +73,7 @@ main.add_command(images)
|
|
|
72
73
|
main.add_command(files)
|
|
73
74
|
main.add_command(fine_tuning)
|
|
74
75
|
main.add_command(models)
|
|
76
|
+
main.add_command(endpoints)
|
|
75
77
|
|
|
76
78
|
if __name__ == "__main__":
|
|
77
79
|
main()
|
together/client.py
CHANGED
together/error.py
CHANGED
|
@@ -18,6 +18,9 @@ class TogetherException(Exception):
|
|
|
18
18
|
request_id: str | None = None,
|
|
19
19
|
http_status: int | None = None,
|
|
20
20
|
) -> None:
|
|
21
|
+
if isinstance(message, TogetherErrorResponse):
|
|
22
|
+
self.api_response = message
|
|
23
|
+
|
|
21
24
|
_message = (
|
|
22
25
|
json.dumps(message.model_dump(exclude_none=True))
|
|
23
26
|
if isinstance(message, TogetherErrorResponse)
|
together/resources/__init__.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
|
+
from together.resources.audio import AsyncAudio, Audio
|
|
1
2
|
from together.resources.chat import AsyncChat, Chat
|
|
2
3
|
from together.resources.completions import AsyncCompletions, Completions
|
|
3
4
|
from together.resources.embeddings import AsyncEmbeddings, Embeddings
|
|
5
|
+
from together.resources.endpoints import AsyncEndpoints, Endpoints
|
|
4
6
|
from together.resources.files import AsyncFiles, Files
|
|
5
7
|
from together.resources.finetune import AsyncFineTuning, FineTuning
|
|
6
8
|
from together.resources.images import AsyncImages, Images
|
|
7
9
|
from together.resources.models import AsyncModels, Models
|
|
8
10
|
from together.resources.rerank import AsyncRerank, Rerank
|
|
9
|
-
from together.resources.audio import AsyncAudio, Audio
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
__all__ = [
|
|
@@ -28,4 +29,6 @@ __all__ = [
|
|
|
28
29
|
"Rerank",
|
|
29
30
|
"AsyncAudio",
|
|
30
31
|
"Audio",
|
|
32
|
+
"AsyncEndpoints",
|
|
33
|
+
"Endpoints",
|
|
31
34
|
]
|
|
@@ -0,0 +1,488 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Dict, List, Literal, Optional, Union
|
|
4
|
+
|
|
5
|
+
from together.abstract import api_requestor
|
|
6
|
+
from together.together_response import TogetherResponse
|
|
7
|
+
from together.types import TogetherClient, TogetherRequest
|
|
8
|
+
from together.types.endpoints import DedicatedEndpoint, HardwareWithStatus, ListEndpoint
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Endpoints:
|
|
12
|
+
def __init__(self, client: TogetherClient) -> None:
|
|
13
|
+
self._client = client
|
|
14
|
+
|
|
15
|
+
def list(
|
|
16
|
+
self, type: Optional[Literal["dedicated", "serverless"]] = None
|
|
17
|
+
) -> List[ListEndpoint]:
|
|
18
|
+
"""
|
|
19
|
+
List all endpoints, can be filtered by type.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
type (str, optional): Filter endpoints by type ("dedicated" or "serverless"). Defaults to None.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
List[ListEndpoint]: List of endpoint objects
|
|
26
|
+
"""
|
|
27
|
+
requestor = api_requestor.APIRequestor(
|
|
28
|
+
client=self._client,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
params = {}
|
|
32
|
+
if type is not None:
|
|
33
|
+
params["type"] = type
|
|
34
|
+
|
|
35
|
+
response, _, _ = requestor.request(
|
|
36
|
+
options=TogetherRequest(
|
|
37
|
+
method="GET",
|
|
38
|
+
url="endpoints",
|
|
39
|
+
params=params,
|
|
40
|
+
),
|
|
41
|
+
stream=False,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
response.data = response.data["data"]
|
|
45
|
+
|
|
46
|
+
assert isinstance(response, TogetherResponse)
|
|
47
|
+
assert isinstance(response.data, list)
|
|
48
|
+
|
|
49
|
+
return [ListEndpoint(**endpoint) for endpoint in response.data]
|
|
50
|
+
|
|
51
|
+
def create(
|
|
52
|
+
self,
|
|
53
|
+
*,
|
|
54
|
+
model: str,
|
|
55
|
+
hardware: str,
|
|
56
|
+
min_replicas: int,
|
|
57
|
+
max_replicas: int,
|
|
58
|
+
display_name: Optional[str] = None,
|
|
59
|
+
disable_prompt_cache: bool = False,
|
|
60
|
+
disable_speculative_decoding: bool = False,
|
|
61
|
+
state: Literal["STARTED", "STOPPED"] = "STARTED",
|
|
62
|
+
) -> DedicatedEndpoint:
|
|
63
|
+
"""
|
|
64
|
+
Create a new dedicated endpoint.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
model (str): The model to deploy on this endpoint
|
|
68
|
+
hardware (str): The hardware configuration to use for this endpoint
|
|
69
|
+
min_replicas (int): The minimum number of replicas to maintain
|
|
70
|
+
max_replicas (int): The maximum number of replicas to scale up to
|
|
71
|
+
display_name (str, optional): A human-readable name for the endpoint
|
|
72
|
+
disable_prompt_cache (bool, optional): Whether to disable the prompt cache. Defaults to False.
|
|
73
|
+
disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
|
|
74
|
+
state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
DedicatedEndpoint: Object containing endpoint information
|
|
78
|
+
"""
|
|
79
|
+
requestor = api_requestor.APIRequestor(
|
|
80
|
+
client=self._client,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
data: Dict[str, Union[str, bool, Dict[str, int]]] = {
|
|
84
|
+
"model": model,
|
|
85
|
+
"hardware": hardware,
|
|
86
|
+
"autoscaling": {
|
|
87
|
+
"min_replicas": min_replicas,
|
|
88
|
+
"max_replicas": max_replicas,
|
|
89
|
+
},
|
|
90
|
+
"disable_prompt_cache": disable_prompt_cache,
|
|
91
|
+
"disable_speculative_decoding": disable_speculative_decoding,
|
|
92
|
+
"state": state,
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
if display_name is not None:
|
|
96
|
+
data["display_name"] = display_name
|
|
97
|
+
|
|
98
|
+
response, _, _ = requestor.request(
|
|
99
|
+
options=TogetherRequest(
|
|
100
|
+
method="POST",
|
|
101
|
+
url="endpoints",
|
|
102
|
+
params=data,
|
|
103
|
+
),
|
|
104
|
+
stream=False,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
assert isinstance(response, TogetherResponse)
|
|
108
|
+
|
|
109
|
+
return DedicatedEndpoint(**response.data)
|
|
110
|
+
|
|
111
|
+
def get(self, endpoint_id: str) -> DedicatedEndpoint:
|
|
112
|
+
"""
|
|
113
|
+
Get details of a specific endpoint.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
endpoint_id (str): ID of the endpoint to retrieve
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
DedicatedEndpoint: Object containing endpoint information
|
|
120
|
+
"""
|
|
121
|
+
requestor = api_requestor.APIRequestor(
|
|
122
|
+
client=self._client,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
response, _, _ = requestor.request(
|
|
126
|
+
options=TogetherRequest(
|
|
127
|
+
method="GET",
|
|
128
|
+
url=f"endpoints/{endpoint_id}",
|
|
129
|
+
),
|
|
130
|
+
stream=False,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
assert isinstance(response, TogetherResponse)
|
|
134
|
+
|
|
135
|
+
return DedicatedEndpoint(**response.data)
|
|
136
|
+
|
|
137
|
+
def delete(self, endpoint_id: str) -> None:
|
|
138
|
+
"""
|
|
139
|
+
Delete a specific endpoint.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
endpoint_id (str): ID of the endpoint to delete
|
|
143
|
+
"""
|
|
144
|
+
requestor = api_requestor.APIRequestor(
|
|
145
|
+
client=self._client,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
requestor.request(
|
|
149
|
+
options=TogetherRequest(
|
|
150
|
+
method="DELETE",
|
|
151
|
+
url=f"endpoints/{endpoint_id}",
|
|
152
|
+
),
|
|
153
|
+
stream=False,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
def update(
|
|
157
|
+
self,
|
|
158
|
+
endpoint_id: str,
|
|
159
|
+
*,
|
|
160
|
+
min_replicas: Optional[int] = None,
|
|
161
|
+
max_replicas: Optional[int] = None,
|
|
162
|
+
state: Optional[Literal["STARTED", "STOPPED"]] = None,
|
|
163
|
+
display_name: Optional[str] = None,
|
|
164
|
+
) -> DedicatedEndpoint:
|
|
165
|
+
"""
|
|
166
|
+
Update an endpoint's configuration.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
endpoint_id (str): ID of the endpoint to update
|
|
170
|
+
min_replicas (int, optional): The minimum number of replicas to maintain
|
|
171
|
+
max_replicas (int, optional): The maximum number of replicas to scale up to
|
|
172
|
+
state (str, optional): The desired state of the endpoint ("STARTED" or "STOPPED")
|
|
173
|
+
display_name (str, optional): A human-readable name for the endpoint
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
DedicatedEndpoint: Object containing endpoint information
|
|
177
|
+
"""
|
|
178
|
+
requestor = api_requestor.APIRequestor(
|
|
179
|
+
client=self._client,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
data: Dict[str, Union[str, Dict[str, int]]] = {}
|
|
183
|
+
|
|
184
|
+
if min_replicas is not None or max_replicas is not None:
|
|
185
|
+
current_min = min_replicas
|
|
186
|
+
current_max = max_replicas
|
|
187
|
+
if current_min is None or current_max is None:
|
|
188
|
+
# Get current values if only one is specified
|
|
189
|
+
current = self.get(endpoint_id=endpoint_id)
|
|
190
|
+
current_min = current_min or current.autoscaling.min_replicas
|
|
191
|
+
current_max = current_max or current.autoscaling.max_replicas
|
|
192
|
+
data["autoscaling"] = {
|
|
193
|
+
"min_replicas": current_min,
|
|
194
|
+
"max_replicas": current_max,
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
if state is not None:
|
|
198
|
+
data["state"] = state
|
|
199
|
+
|
|
200
|
+
if display_name is not None:
|
|
201
|
+
data["display_name"] = display_name
|
|
202
|
+
|
|
203
|
+
response, _, _ = requestor.request(
|
|
204
|
+
options=TogetherRequest(
|
|
205
|
+
method="PATCH",
|
|
206
|
+
url=f"endpoints/{endpoint_id}",
|
|
207
|
+
params=data,
|
|
208
|
+
),
|
|
209
|
+
stream=False,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
assert isinstance(response, TogetherResponse)
|
|
213
|
+
|
|
214
|
+
return DedicatedEndpoint(**response.data)
|
|
215
|
+
|
|
216
|
+
def list_hardware(self, model: Optional[str] = None) -> List[HardwareWithStatus]:
|
|
217
|
+
"""
|
|
218
|
+
List available hardware configurations.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
model (str, optional): Filter hardware configurations by model compatibility. When provided,
|
|
222
|
+
the response includes availability status for each compatible configuration.
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
List[HardwareWithStatus]: List of hardware configurations with their status
|
|
226
|
+
"""
|
|
227
|
+
requestor = api_requestor.APIRequestor(
|
|
228
|
+
client=self._client,
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
params = {}
|
|
232
|
+
if model is not None:
|
|
233
|
+
params["model"] = model
|
|
234
|
+
|
|
235
|
+
response, _, _ = requestor.request(
|
|
236
|
+
options=TogetherRequest(
|
|
237
|
+
method="GET",
|
|
238
|
+
url="hardware",
|
|
239
|
+
params=params,
|
|
240
|
+
),
|
|
241
|
+
stream=False,
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
assert isinstance(response, TogetherResponse)
|
|
245
|
+
assert isinstance(response.data, dict)
|
|
246
|
+
assert isinstance(response.data["data"], list)
|
|
247
|
+
|
|
248
|
+
return [HardwareWithStatus(**item) for item in response.data["data"]]
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
class AsyncEndpoints:
|
|
252
|
+
def __init__(self, client: TogetherClient) -> None:
|
|
253
|
+
self._client = client
|
|
254
|
+
|
|
255
|
+
async def list(
|
|
256
|
+
self, type: Optional[Literal["dedicated", "serverless"]] = None
|
|
257
|
+
) -> List[ListEndpoint]:
|
|
258
|
+
"""
|
|
259
|
+
List all endpoints, can be filtered by type.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
type (str, optional): Filter endpoints by type ("dedicated" or "serverless"). Defaults to None.
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
List[ListEndpoint]: List of endpoint objects
|
|
266
|
+
"""
|
|
267
|
+
requestor = api_requestor.APIRequestor(
|
|
268
|
+
client=self._client,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
params = {}
|
|
272
|
+
if type is not None:
|
|
273
|
+
params["type"] = type
|
|
274
|
+
|
|
275
|
+
response, _, _ = await requestor.arequest(
|
|
276
|
+
options=TogetherRequest(
|
|
277
|
+
method="GET",
|
|
278
|
+
url="endpoints",
|
|
279
|
+
params=params,
|
|
280
|
+
),
|
|
281
|
+
stream=False,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
assert isinstance(response, TogetherResponse)
|
|
285
|
+
assert isinstance(response.data, list)
|
|
286
|
+
|
|
287
|
+
return [ListEndpoint(**endpoint) for endpoint in response.data]
|
|
288
|
+
|
|
289
|
+
async def create(
|
|
290
|
+
self,
|
|
291
|
+
*,
|
|
292
|
+
model: str,
|
|
293
|
+
hardware: str,
|
|
294
|
+
min_replicas: int,
|
|
295
|
+
max_replicas: int,
|
|
296
|
+
display_name: Optional[str] = None,
|
|
297
|
+
disable_prompt_cache: bool = False,
|
|
298
|
+
disable_speculative_decoding: bool = False,
|
|
299
|
+
state: Literal["STARTED", "STOPPED"] = "STARTED",
|
|
300
|
+
) -> DedicatedEndpoint:
|
|
301
|
+
"""
|
|
302
|
+
Create a new dedicated endpoint.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
model (str): The model to deploy on this endpoint
|
|
306
|
+
hardware (str): The hardware configuration to use for this endpoint
|
|
307
|
+
min_replicas (int): The minimum number of replicas to maintain
|
|
308
|
+
max_replicas (int): The maximum number of replicas to scale up to
|
|
309
|
+
display_name (str, optional): A human-readable name for the endpoint
|
|
310
|
+
disable_prompt_cache (bool, optional): Whether to disable the prompt cache. Defaults to False.
|
|
311
|
+
disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
|
|
312
|
+
state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
DedicatedEndpoint: Object containing endpoint information
|
|
316
|
+
"""
|
|
317
|
+
requestor = api_requestor.APIRequestor(
|
|
318
|
+
client=self._client,
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
data: Dict[str, Union[str, bool, Dict[str, int]]] = {
|
|
322
|
+
"model": model,
|
|
323
|
+
"hardware": hardware,
|
|
324
|
+
"autoscaling": {
|
|
325
|
+
"min_replicas": min_replicas,
|
|
326
|
+
"max_replicas": max_replicas,
|
|
327
|
+
},
|
|
328
|
+
"disable_prompt_cache": disable_prompt_cache,
|
|
329
|
+
"disable_speculative_decoding": disable_speculative_decoding,
|
|
330
|
+
"state": state,
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
if display_name is not None:
|
|
334
|
+
data["display_name"] = display_name
|
|
335
|
+
|
|
336
|
+
response, _, _ = await requestor.arequest(
|
|
337
|
+
options=TogetherRequest(
|
|
338
|
+
method="POST",
|
|
339
|
+
url="endpoints",
|
|
340
|
+
params=data,
|
|
341
|
+
),
|
|
342
|
+
stream=False,
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
assert isinstance(response, TogetherResponse)
|
|
346
|
+
|
|
347
|
+
return DedicatedEndpoint(**response.data)
|
|
348
|
+
|
|
349
|
+
async def get(self, endpoint_id: str) -> DedicatedEndpoint:
|
|
350
|
+
"""
|
|
351
|
+
Get details of a specific endpoint.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
endpoint_id (str): ID of the endpoint to retrieve
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
DedicatedEndpoint: Object containing endpoint information
|
|
358
|
+
"""
|
|
359
|
+
requestor = api_requestor.APIRequestor(
|
|
360
|
+
client=self._client,
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
response, _, _ = await requestor.arequest(
|
|
364
|
+
options=TogetherRequest(
|
|
365
|
+
method="GET",
|
|
366
|
+
url=f"endpoints/{endpoint_id}",
|
|
367
|
+
),
|
|
368
|
+
stream=False,
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
assert isinstance(response, TogetherResponse)
|
|
372
|
+
|
|
373
|
+
return DedicatedEndpoint(**response.data)
|
|
374
|
+
|
|
375
|
+
async def delete(self, endpoint_id: str) -> None:
|
|
376
|
+
"""
|
|
377
|
+
Delete a specific endpoint.
|
|
378
|
+
|
|
379
|
+
Args:
|
|
380
|
+
endpoint_id (str): ID of the endpoint to delete
|
|
381
|
+
"""
|
|
382
|
+
requestor = api_requestor.APIRequestor(
|
|
383
|
+
client=self._client,
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
await requestor.arequest(
|
|
387
|
+
options=TogetherRequest(
|
|
388
|
+
method="DELETE",
|
|
389
|
+
url=f"endpoints/{endpoint_id}",
|
|
390
|
+
),
|
|
391
|
+
stream=False,
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
async def update(
|
|
395
|
+
self,
|
|
396
|
+
endpoint_id: str,
|
|
397
|
+
*,
|
|
398
|
+
min_replicas: Optional[int] = None,
|
|
399
|
+
max_replicas: Optional[int] = None,
|
|
400
|
+
state: Optional[Literal["STARTED", "STOPPED"]] = None,
|
|
401
|
+
display_name: Optional[str] = None,
|
|
402
|
+
) -> DedicatedEndpoint:
|
|
403
|
+
"""
|
|
404
|
+
Update an endpoint's configuration.
|
|
405
|
+
|
|
406
|
+
Args:
|
|
407
|
+
endpoint_id (str): ID of the endpoint to update
|
|
408
|
+
min_replicas (int, optional): The minimum number of replicas to maintain
|
|
409
|
+
max_replicas (int, optional): The maximum number of replicas to scale up to
|
|
410
|
+
state (str, optional): The desired state of the endpoint ("STARTED" or "STOPPED")
|
|
411
|
+
display_name (str, optional): A human-readable name for the endpoint
|
|
412
|
+
|
|
413
|
+
Returns:
|
|
414
|
+
DedicatedEndpoint: Object containing endpoint information
|
|
415
|
+
"""
|
|
416
|
+
requestor = api_requestor.APIRequestor(
|
|
417
|
+
client=self._client,
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
data: Dict[str, Union[str, Dict[str, int]]] = {}
|
|
421
|
+
|
|
422
|
+
if min_replicas is not None or max_replicas is not None:
|
|
423
|
+
current_min = min_replicas
|
|
424
|
+
current_max = max_replicas
|
|
425
|
+
if current_min is None or current_max is None:
|
|
426
|
+
# Get current values if only one is specified
|
|
427
|
+
current = await self.get(endpoint_id=endpoint_id)
|
|
428
|
+
current_min = current_min or current.autoscaling.min_replicas
|
|
429
|
+
current_max = current_max or current.autoscaling.max_replicas
|
|
430
|
+
data["autoscaling"] = {
|
|
431
|
+
"min_replicas": current_min,
|
|
432
|
+
"max_replicas": current_max,
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
if state is not None:
|
|
436
|
+
data["state"] = state
|
|
437
|
+
|
|
438
|
+
if display_name is not None:
|
|
439
|
+
data["display_name"] = display_name
|
|
440
|
+
|
|
441
|
+
response, _, _ = await requestor.arequest(
|
|
442
|
+
options=TogetherRequest(
|
|
443
|
+
method="PATCH",
|
|
444
|
+
url=f"endpoints/{endpoint_id}",
|
|
445
|
+
params=data,
|
|
446
|
+
),
|
|
447
|
+
stream=False,
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
assert isinstance(response, TogetherResponse)
|
|
451
|
+
|
|
452
|
+
return DedicatedEndpoint(**response.data)
|
|
453
|
+
|
|
454
|
+
async def list_hardware(
|
|
455
|
+
self, model: Optional[str] = None
|
|
456
|
+
) -> List[HardwareWithStatus]:
|
|
457
|
+
"""
|
|
458
|
+
List available hardware configurations.
|
|
459
|
+
|
|
460
|
+
Args:
|
|
461
|
+
model (str, optional): Filter hardware configurations by model compatibility. When provided,
|
|
462
|
+
the response includes availability status for each compatible configuration.
|
|
463
|
+
|
|
464
|
+
Returns:
|
|
465
|
+
List[HardwareWithStatus]: List of hardware configurations with their status
|
|
466
|
+
"""
|
|
467
|
+
requestor = api_requestor.APIRequestor(
|
|
468
|
+
client=self._client,
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
params = {}
|
|
472
|
+
if model is not None:
|
|
473
|
+
params["model"] = model
|
|
474
|
+
|
|
475
|
+
response, _, _ = await requestor.arequest(
|
|
476
|
+
options=TogetherRequest(
|
|
477
|
+
method="GET",
|
|
478
|
+
url="hardware",
|
|
479
|
+
params=params,
|
|
480
|
+
),
|
|
481
|
+
stream=False,
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
assert isinstance(response, TogetherResponse)
|
|
485
|
+
assert isinstance(response.data, dict)
|
|
486
|
+
assert isinstance(response.data["data"], list)
|
|
487
|
+
|
|
488
|
+
return [HardwareWithStatus(**item) for item in response.data["data"]]
|
together/types/__init__.py
CHANGED
|
@@ -1,4 +1,13 @@
|
|
|
1
1
|
from together.types.abstract import TogetherClient
|
|
2
|
+
from together.types.audio_speech import (
|
|
3
|
+
AudioLanguage,
|
|
4
|
+
AudioResponseEncoding,
|
|
5
|
+
AudioResponseFormat,
|
|
6
|
+
AudioSpeechRequest,
|
|
7
|
+
AudioSpeechStreamChunk,
|
|
8
|
+
AudioSpeechStreamEvent,
|
|
9
|
+
AudioSpeechStreamResponse,
|
|
10
|
+
)
|
|
2
11
|
from together.types.chat_completions import (
|
|
3
12
|
ChatCompletionChunk,
|
|
4
13
|
ChatCompletionRequest,
|
|
@@ -11,6 +20,7 @@ from together.types.completions import (
|
|
|
11
20
|
CompletionResponse,
|
|
12
21
|
)
|
|
13
22
|
from together.types.embeddings import EmbeddingRequest, EmbeddingResponse
|
|
23
|
+
from together.types.endpoints import Autoscaling, DedicatedEndpoint, ListEndpoint
|
|
14
24
|
from together.types.files import (
|
|
15
25
|
FileDeleteResponse,
|
|
16
26
|
FileList,
|
|
@@ -22,35 +32,21 @@ from together.types.files import (
|
|
|
22
32
|
)
|
|
23
33
|
from together.types.finetune import (
|
|
24
34
|
FinetuneDownloadResult,
|
|
35
|
+
FinetuneLinearLRSchedulerArgs,
|
|
25
36
|
FinetuneList,
|
|
26
37
|
FinetuneListEvents,
|
|
38
|
+
FinetuneLRScheduler,
|
|
27
39
|
FinetuneRequest,
|
|
28
40
|
FinetuneResponse,
|
|
41
|
+
FinetuneTrainingLimits,
|
|
29
42
|
FullTrainingType,
|
|
30
43
|
LoRATrainingType,
|
|
31
44
|
TrainingType,
|
|
32
|
-
FinetuneTrainingLimits,
|
|
33
|
-
FinetuneLRScheduler,
|
|
34
|
-
FinetuneLinearLRSchedulerArgs,
|
|
35
|
-
)
|
|
36
|
-
from together.types.images import (
|
|
37
|
-
ImageRequest,
|
|
38
|
-
ImageResponse,
|
|
39
45
|
)
|
|
46
|
+
from together.types.images import ImageRequest, ImageResponse
|
|
40
47
|
from together.types.models import ModelObject
|
|
41
|
-
from together.types.rerank import
|
|
42
|
-
|
|
43
|
-
RerankResponse,
|
|
44
|
-
)
|
|
45
|
-
from together.types.audio_speech import (
|
|
46
|
-
AudioSpeechRequest,
|
|
47
|
-
AudioResponseFormat,
|
|
48
|
-
AudioLanguage,
|
|
49
|
-
AudioResponseEncoding,
|
|
50
|
-
AudioSpeechStreamChunk,
|
|
51
|
-
AudioSpeechStreamEvent,
|
|
52
|
-
AudioSpeechStreamResponse,
|
|
53
|
-
)
|
|
48
|
+
from together.types.rerank import RerankRequest, RerankResponse
|
|
49
|
+
|
|
54
50
|
|
|
55
51
|
__all__ = [
|
|
56
52
|
"TogetherClient",
|
|
@@ -93,4 +89,7 @@ __all__ = [
|
|
|
93
89
|
"AudioSpeechStreamChunk",
|
|
94
90
|
"AudioSpeechStreamEvent",
|
|
95
91
|
"AudioSpeechStreamResponse",
|
|
92
|
+
"DedicatedEndpoint",
|
|
93
|
+
"ListEndpoint",
|
|
94
|
+
"Autoscaling",
|
|
96
95
|
]
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Any, Dict, Literal, Optional, Union
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TogetherJSONModel(BaseModel):
|
|
10
|
+
"""Base model with JSON serialization support."""
|
|
11
|
+
|
|
12
|
+
def model_dump(self, **kwargs: Any) -> Dict[str, Any]:
|
|
13
|
+
exclude_none = kwargs.pop("exclude_none", True)
|
|
14
|
+
data = super().model_dump(exclude_none=exclude_none, **kwargs)
|
|
15
|
+
|
|
16
|
+
# Convert datetime objects to ISO format strings
|
|
17
|
+
for key, value in data.items():
|
|
18
|
+
if isinstance(value, datetime):
|
|
19
|
+
data[key] = value.isoformat()
|
|
20
|
+
|
|
21
|
+
return data
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Autoscaling(TogetherJSONModel):
|
|
25
|
+
"""Configuration for automatic scaling of replicas based on demand."""
|
|
26
|
+
|
|
27
|
+
min_replicas: int = Field(
|
|
28
|
+
description="The minimum number of replicas to maintain, even when there is no load"
|
|
29
|
+
)
|
|
30
|
+
max_replicas: int = Field(
|
|
31
|
+
description="The maximum number of replicas to scale up to under load"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class EndpointPricing(TogetherJSONModel):
|
|
36
|
+
"""Pricing details for using an endpoint."""
|
|
37
|
+
|
|
38
|
+
cents_per_minute: float = Field(
|
|
39
|
+
description="Cost per minute of endpoint uptime in cents"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class HardwareSpec(TogetherJSONModel):
|
|
44
|
+
"""Detailed specifications of a hardware configuration."""
|
|
45
|
+
|
|
46
|
+
gpu_type: str = Field(description="The type/model of GPU")
|
|
47
|
+
gpu_link: str = Field(description="The GPU interconnect technology")
|
|
48
|
+
gpu_memory: Union[float, int] = Field(description="Amount of GPU memory in GB")
|
|
49
|
+
gpu_count: int = Field(description="Number of GPUs in this configuration")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class HardwareAvailability(TogetherJSONModel):
|
|
53
|
+
"""Indicates the current availability status of a hardware configuration."""
|
|
54
|
+
|
|
55
|
+
status: Literal["available", "unavailable", "insufficient"] = Field(
|
|
56
|
+
description="The availability status of the hardware configuration"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class HardwareWithStatus(TogetherJSONModel):
|
|
61
|
+
"""Hardware configuration details with optional availability status."""
|
|
62
|
+
|
|
63
|
+
object: Literal["hardware"] = Field(description="The type of object")
|
|
64
|
+
id: str = Field(description="Unique identifier for the hardware configuration")
|
|
65
|
+
pricing: EndpointPricing = Field(
|
|
66
|
+
description="Pricing details for this hardware configuration"
|
|
67
|
+
)
|
|
68
|
+
specs: HardwareSpec = Field(description="Detailed specifications of this hardware")
|
|
69
|
+
availability: Optional[HardwareAvailability] = Field(
|
|
70
|
+
default=None,
|
|
71
|
+
description="Current availability status of this hardware configuration",
|
|
72
|
+
)
|
|
73
|
+
updated_at: datetime = Field(
|
|
74
|
+
description="Timestamp of when the hardware status was last updated"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class BaseEndpoint(TogetherJSONModel):
|
|
79
|
+
"""Base class for endpoint models with common fields."""
|
|
80
|
+
|
|
81
|
+
object: Literal["endpoint"] = Field(description="The type of object")
|
|
82
|
+
id: Optional[str] = Field(
|
|
83
|
+
default=None, description="Unique identifier for the endpoint"
|
|
84
|
+
)
|
|
85
|
+
name: str = Field(description="System name for the endpoint")
|
|
86
|
+
model: str = Field(description="The model deployed on this endpoint")
|
|
87
|
+
type: str = Field(description="The type of endpoint")
|
|
88
|
+
owner: str = Field(description="The owner of this endpoint")
|
|
89
|
+
state: Literal["PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "ERROR"] = (
|
|
90
|
+
Field(description="Current state of the endpoint")
|
|
91
|
+
)
|
|
92
|
+
created_at: datetime = Field(description="Timestamp when the endpoint was created")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class ListEndpoint(BaseEndpoint):
|
|
96
|
+
"""Details about an endpoint when listed via the list endpoint."""
|
|
97
|
+
|
|
98
|
+
type: Literal["dedicated", "serverless"] = Field(description="The type of endpoint")
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class DedicatedEndpoint(BaseEndpoint):
|
|
102
|
+
"""Details about a dedicated endpoint deployment."""
|
|
103
|
+
|
|
104
|
+
id: str = Field(description="Unique identifier for the endpoint")
|
|
105
|
+
type: Literal["dedicated"] = Field(description="The type of endpoint")
|
|
106
|
+
display_name: str = Field(description="Human-readable name for the endpoint")
|
|
107
|
+
hardware: str = Field(
|
|
108
|
+
description="The hardware configuration used for this endpoint"
|
|
109
|
+
)
|
|
110
|
+
autoscaling: Autoscaling = Field(
|
|
111
|
+
description="Configuration for automatic scaling of the endpoint"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
__all__ = [
|
|
116
|
+
"DedicatedEndpoint",
|
|
117
|
+
"ListEndpoint",
|
|
118
|
+
"Autoscaling",
|
|
119
|
+
"EndpointPricing",
|
|
120
|
+
"HardwareSpec",
|
|
121
|
+
"HardwareAvailability",
|
|
122
|
+
"HardwareWithStatus",
|
|
123
|
+
]
|
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: together
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.4.1
|
|
4
4
|
Summary: Python client for Together's Cloud Platform!
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Together AI
|
|
7
7
|
Author-email: support@together.ai
|
|
8
|
-
Requires-Python: >=3.
|
|
8
|
+
Requires-Python: >=3.9,<4.0
|
|
9
9
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
10
|
Classifier: Operating System :: POSIX :: Linux
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.9
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.10
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.11
|
|
@@ -21,7 +20,7 @@ Requires-Dist: eval-type-backport (>=0.1.3,<0.3.0)
|
|
|
21
20
|
Requires-Dist: filelock (>=3.13.1,<4.0.0)
|
|
22
21
|
Requires-Dist: numpy (>=1.23.5) ; python_version < "3.12"
|
|
23
22
|
Requires-Dist: numpy (>=1.26.0) ; python_version >= "3.12"
|
|
24
|
-
Requires-Dist: pillow (>=
|
|
23
|
+
Requires-Dist: pillow (>=11.1.0,<12.0.0)
|
|
25
24
|
Requires-Dist: pyarrow (>=10.0.1)
|
|
26
25
|
Requires-Dist: pydantic (>=2.6.3,<3.0.0)
|
|
27
26
|
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
|
@@ -1,19 +1,20 @@
|
|
|
1
1
|
together/__init__.py,sha256=B8T7ybZ7D6jJNRTuFDVjOFlImCNag8tNZXpZdXz7xNM,1530
|
|
2
2
|
together/abstract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
together/abstract/api_requestor.py,sha256=
|
|
3
|
+
together/abstract/api_requestor.py,sha256=kKVxkJqpd1CQ4t9Ky4kngkvlzZh1xoDN0PBAM8mGW_Q,25948
|
|
4
4
|
together/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
together/cli/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
together/cli/api/chat.py,sha256=2PHRb-9T-lUEKhUJFtc7SxJv3shCVx40gq_8pzfsewM,9234
|
|
7
7
|
together/cli/api/completions.py,sha256=l-Zw5t7hojL3w8xd_mitS2NRB72i5Z0xwkzH0rT5XMc,4263
|
|
8
|
+
together/cli/api/endpoints.py,sha256=LUIuK4DLs-VYor1nvOPzUNq0WeA7nIgIBHBD5Erdd5I,12470
|
|
8
9
|
together/cli/api/files.py,sha256=QLYEXRkY8J2Gg1SbTCtzGfoTMvosoeACNK83L_oLubs,3397
|
|
9
10
|
together/cli/api/finetune.py,sha256=FWHENGE86oLNCVXEJN9AAU3FlSTtnO96aShhB2xVrsk,12923
|
|
10
11
|
together/cli/api/images.py,sha256=GADSeaNUHUVMtWovmccGuKc28IJ9E_v4vAEwYHJhu5o,2645
|
|
11
12
|
together/cli/api/models.py,sha256=xWEzu8ZpxM_Pz9KEjRPRVuv_v22RayYZ4QcgiezT5tE,1126
|
|
12
13
|
together/cli/api/utils.py,sha256=IuqYWPnLI38_Bqd7lj8V_SnGdYc59pRmMbQmciS4FsM,1326
|
|
13
|
-
together/cli/cli.py,sha256=
|
|
14
|
-
together/client.py,sha256=
|
|
14
|
+
together/cli/cli.py,sha256=YCDzbXpC5is0rs2PEkUPrIhYuzdyrihQ8GVR_TlDv5s,2054
|
|
15
|
+
together/client.py,sha256=vOe9NOgDyDlrT5ppvNfJGzdOHnMWEPmJX2RbXUQXKno,5081
|
|
15
16
|
together/constants.py,sha256=0L2R8ftvls9eywQstSsrQcpHIkYsOo473vGw0okArN4,1359
|
|
16
|
-
together/error.py,sha256=
|
|
17
|
+
together/error.py,sha256=HU6247CyzCFjaxL9A0XYbXZ6fY_ebRg0FEYjI4Skogs,5515
|
|
17
18
|
together/filemanager.py,sha256=QHhBn73oVFdgUpSYXYLmJzHJ9c5wYEMJC0ur6ZgDeYo,11269
|
|
18
19
|
together/legacy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
20
|
together/legacy/base.py,sha256=ehrX1SCfRbK5OA83wL1q7-tfF-yuZOUxzjxYfFtdvvQ,727
|
|
@@ -23,26 +24,28 @@ together/legacy/files.py,sha256=qmAqMiNTPWb6WvLV5Tsv6kxGRfQ31q7OkHZNFwkw8v0,4082
|
|
|
23
24
|
together/legacy/finetune.py,sha256=LENaqegeb1PszXDbAhTNPro7T3isz6X_IICIOKH7dKE,5114
|
|
24
25
|
together/legacy/images.py,sha256=bJJRs-6C7-NexPyaeyHiYlHOU51yls5-QAiqtO4xrZU,626
|
|
25
26
|
together/legacy/models.py,sha256=85ZN9Ids_FjdYNDRv5k7sgrtVWPKPHqkDplORtVUGHg,1087
|
|
26
|
-
together/resources/__init__.py,sha256=
|
|
27
|
+
together/resources/__init__.py,sha256=OQ8tW9mUIX0Ezk0wvYEnnEym6wGsjBKgXFLU9Ffgb-o,984
|
|
27
28
|
together/resources/audio/__init__.py,sha256=e7xp0Lkp_nMAHXcuFHS7dLXP_YqTPMMZIilW1TW_sAI,551
|
|
28
29
|
together/resources/audio/speech.py,sha256=81ib_gIo-Rxoaipx2Pi9ZsKnOTjeFPwSlBrcUkyX5xk,5211
|
|
29
30
|
together/resources/chat/__init__.py,sha256=RsTptdP8MeGjcdIjze896-J27cRvCbUoMft0X2BVlQ8,617
|
|
30
31
|
together/resources/chat/completions.py,sha256=jYiNZsWa8RyEacL0VgxWj1egJ857oU4nxIY8uqGHcaU,14459
|
|
31
32
|
together/resources/completions.py,sha256=5Wa-ZjPCxRcam6CDe7KgGYlTA7yJZMmd5TrRgGCL_ug,11726
|
|
32
33
|
together/resources/embeddings.py,sha256=PTvLb82yjG_-iQOyuhsilp77Fr7gZ0o6WD2KeRnKoxs,2675
|
|
34
|
+
together/resources/endpoints.py,sha256=tk_Ih94F9CXDmdRqsmOHS4yedmyxiUfIjFodh6pbCl8,15865
|
|
33
35
|
together/resources/files.py,sha256=bnPbaF25e4InBRPvHwXHXT-oSX1Z1sZRsnQW5wq82U4,4990
|
|
34
36
|
together/resources/finetune.py,sha256=0UiN2jxxV_lQ9QSFKDjAioXVgPCIzb7biIJbcQj1oq4,26998
|
|
35
37
|
together/resources/images.py,sha256=LQUjKPaFxWTqOAPnyF1Pp7Rz4NLOYhmoKwshpYiprEM,4923
|
|
36
38
|
together/resources/models.py,sha256=2dtHhXAqTDOOpwSbYLzWcKTC0-m2Szlb7LDYvp7Jr4w,1786
|
|
37
39
|
together/resources/rerank.py,sha256=3Ju_aRSyZ1s_3zCSNZnSnEJErUVmt2xa3M8z1nvejMA,3931
|
|
38
40
|
together/together_response.py,sha256=a3dgKMPDrlfKQwxYENfNt2T4l2vSZxRWMixhHSy-q3E,1308
|
|
39
|
-
together/types/__init__.py,sha256=
|
|
41
|
+
together/types/__init__.py,sha256=cQUr9zEoRCFYhoI3ECU0Zj5APUiFsG8Hpo3VOopVPDE,2406
|
|
40
42
|
together/types/abstract.py,sha256=1lFQI_3WjsR_t1128AeKW0aTk6EiM6Gh1J3ZuyLLPao,642
|
|
41
43
|
together/types/audio_speech.py,sha256=jlj8BZf3dkIDARF1P11fuenVLj4try8Yx4RN-EAkhOU,2609
|
|
42
44
|
together/types/chat_completions.py,sha256=tIHQzB1N1DsUl3WojsrfErqxVmcI_eweGVp_gbf6dp8,4914
|
|
43
45
|
together/types/common.py,sha256=kxZ-N9xtBsGYZBmbIWnZ0rfT3Pn8PFB7sAbp3iv96pw,1525
|
|
44
46
|
together/types/completions.py,sha256=o3FR5ixsTUj-a3pmOUzbSQg-hESVhpqrC9UD__VCqr4,2971
|
|
45
47
|
together/types/embeddings.py,sha256=J7grkYYn7xhqeKaBO2T-8XQRtHhkzYzymovtGdIUK5A,751
|
|
48
|
+
together/types/endpoints.py,sha256=ePV4ogBxKSVRwdYm2lTpj6n-EB2FYtc6Bva9fkZGKP8,4385
|
|
46
49
|
together/types/error.py,sha256=OVlCs3cx_2WhZK4JzHT8SQyRIIqKOP1AZQ4y1PydjAE,370
|
|
47
50
|
together/types/files.py,sha256=-rEUfsV6f2vZB9NrFxT4_933ubsDIUNkPB-3OlOFk4A,1954
|
|
48
51
|
together/types/finetune.py,sha256=u4rZne7dd0F3jfQ9iXxIVG405kfr65rlJiEMkEZrfWY,9052
|
|
@@ -55,8 +58,8 @@ together/utils/api_helpers.py,sha256=RSF7SRhbjHzroMOSWAXscflByM1r1ta_1SpxkAT22iE
|
|
|
55
58
|
together/utils/files.py,sha256=4SxxrTYfVoWvsD0n7O73LVjexAxYCWvXUBgmzrJY5-s,14169
|
|
56
59
|
together/utils/tools.py,sha256=3-lXWP3cBCzOVSZg9tr5zOT1jaVeKAKVWxO2fcXZTh8,1788
|
|
57
60
|
together/version.py,sha256=p03ivHyE0SyWU4jAnRTBi_sOwywVWoZPU4g2gzRgG-Y,126
|
|
58
|
-
together-1.
|
|
59
|
-
together-1.
|
|
60
|
-
together-1.
|
|
61
|
-
together-1.
|
|
62
|
-
together-1.
|
|
61
|
+
together-1.4.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
62
|
+
together-1.4.1.dist-info/METADATA,sha256=7TxIWf52LkVaNwfvnldCWkyHXJ27sOMBhn7g0OVhmUI,12649
|
|
63
|
+
together-1.4.1.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
|
64
|
+
together-1.4.1.dist-info/entry_points.txt,sha256=G-b5NKW6lUUf1V1fH8IPTBb7jXnK7lhbX9H1zTEJXPs,50
|
|
65
|
+
together-1.4.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|