together 1.4.5__tar.gz → 1.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {together-1.4.5 → together-1.5.2}/PKG-INFO +3 -4
- {together-1.4.5 → together-1.5.2}/README.md +1 -1
- {together-1.4.5 → together-1.5.2}/pyproject.toml +2 -2
- {together-1.4.5 → together-1.5.2}/src/together/cli/api/endpoints.py +20 -1
- {together-1.4.5 → together-1.5.2}/src/together/cli/api/finetune.py +46 -9
- {together-1.4.5 → together-1.5.2}/src/together/cli/api/models.py +20 -7
- {together-1.4.5 → together-1.5.2}/src/together/resources/endpoints.py +24 -4
- {together-1.4.5 → together-1.5.2}/src/together/resources/finetune.py +70 -13
- together-1.5.2/src/together/resources/models.py +134 -0
- {together-1.4.5 → together-1.5.2}/src/together/types/__init__.py +7 -1
- {together-1.4.5 → together-1.5.2}/src/together/types/finetune.py +21 -7
- together-1.4.5/src/together/resources/models.py +0 -75
- {together-1.4.5 → together-1.5.2}/LICENSE +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/__init__.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/abstract/__init__.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/abstract/api_requestor.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/cli/__init__.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/cli/api/__init__.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/cli/api/chat.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/cli/api/completions.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/cli/api/files.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/cli/api/images.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/cli/api/utils.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/cli/cli.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/client.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/constants.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/error.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/filemanager.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/legacy/__init__.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/legacy/base.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/legacy/complete.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/legacy/embeddings.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/legacy/files.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/legacy/finetune.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/legacy/images.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/legacy/models.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/resources/__init__.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/resources/audio/__init__.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/resources/audio/speech.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/resources/chat/__init__.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/resources/chat/completions.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/resources/completions.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/resources/embeddings.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/resources/files.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/resources/images.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/resources/rerank.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/together_response.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/types/abstract.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/types/audio_speech.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/types/chat_completions.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/types/common.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/types/completions.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/types/embeddings.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/types/endpoints.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/types/error.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/types/files.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/types/images.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/types/models.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/types/rerank.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/utils/__init__.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/utils/_log.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/utils/api_helpers.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/utils/files.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/utils/tools.py +0 -0
- {together-1.4.5 → together-1.5.2}/src/together/version.py +0 -0
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: together
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.5.2
|
|
4
4
|
Summary: Python client for Together's Cloud Platform!
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Together AI
|
|
7
7
|
Author-email: support@together.ai
|
|
8
|
-
Requires-Python: >=3.
|
|
8
|
+
Requires-Python: >=3.10,<4.0
|
|
9
9
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
10
|
Classifier: Operating System :: POSIX :: Linux
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
@@ -45,7 +44,7 @@ Description-Content-Type: text/markdown
|
|
|
45
44
|
[](https://discord.com/invite/9Rk6sSeWEG)
|
|
46
45
|
[](https://twitter.com/togethercompute)
|
|
47
46
|
|
|
48
|
-
The [Together Python API Library](https://pypi.org/project/together/) is the official Python client for Together's API platform, providing a convenient way for interacting with the REST APIs and enables easy integrations with Python 3.
|
|
47
|
+
The [Together Python API Library](https://pypi.org/project/together/) is the official Python client for Together's API platform, providing a convenient way for interacting with the REST APIs and enables easy integrations with Python 3.10+ applications with easy to use synchronous and asynchronous clients.
|
|
49
48
|
|
|
50
49
|
|
|
51
50
|
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
[](https://discord.com/invite/9Rk6sSeWEG)
|
|
11
11
|
[](https://twitter.com/togethercompute)
|
|
12
12
|
|
|
13
|
-
The [Together Python API Library](https://pypi.org/project/together/) is the official Python client for Together's API platform, providing a convenient way for interacting with the REST APIs and enables easy integrations with Python 3.
|
|
13
|
+
The [Together Python API Library](https://pypi.org/project/together/) is the official Python client for Together's API platform, providing a convenient way for interacting with the REST APIs and enables easy integrations with Python 3.10+ applications with easy to use synchronous and asynchronous clients.
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
|
|
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
|
|
|
12
12
|
|
|
13
13
|
[tool.poetry]
|
|
14
14
|
name = "together"
|
|
15
|
-
version = "1.
|
|
15
|
+
version = "1.5.2"
|
|
16
16
|
authors = [
|
|
17
17
|
"Together AI <support@together.ai>"
|
|
18
18
|
]
|
|
@@ -28,7 +28,7 @@ repository = "https://github.com/togethercomputer/together-python"
|
|
|
28
28
|
homepage = "https://github.com/togethercomputer/together-python"
|
|
29
29
|
|
|
30
30
|
[tool.poetry.dependencies]
|
|
31
|
-
python = "^3.
|
|
31
|
+
python = "^3.10"
|
|
32
32
|
typer = ">=0.9,<0.16"
|
|
33
33
|
requests = "^2.31.0"
|
|
34
34
|
rich = "^13.8.1"
|
|
@@ -127,6 +127,11 @@ def endpoints(ctx: click.Context) -> None:
|
|
|
127
127
|
is_flag=True,
|
|
128
128
|
help="Create the endpoint in STOPPED state instead of auto-starting it",
|
|
129
129
|
)
|
|
130
|
+
@click.option(
|
|
131
|
+
"--inactive-timeout",
|
|
132
|
+
type=int,
|
|
133
|
+
help="Number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable.",
|
|
134
|
+
)
|
|
130
135
|
@click.option(
|
|
131
136
|
"--wait",
|
|
132
137
|
is_flag=True,
|
|
@@ -146,6 +151,7 @@ def create(
|
|
|
146
151
|
no_prompt_cache: bool,
|
|
147
152
|
no_speculative_decoding: bool,
|
|
148
153
|
no_auto_start: bool,
|
|
154
|
+
inactive_timeout: int | None,
|
|
149
155
|
wait: bool,
|
|
150
156
|
) -> None:
|
|
151
157
|
"""Create a new dedicated inference endpoint."""
|
|
@@ -170,6 +176,7 @@ def create(
|
|
|
170
176
|
disable_prompt_cache=no_prompt_cache,
|
|
171
177
|
disable_speculative_decoding=no_speculative_decoding,
|
|
172
178
|
state="STOPPED" if no_auto_start else "STARTED",
|
|
179
|
+
inactive_timeout=inactive_timeout,
|
|
173
180
|
)
|
|
174
181
|
except InvalidRequestError as e:
|
|
175
182
|
print_api_error(e)
|
|
@@ -194,6 +201,8 @@ def create(
|
|
|
194
201
|
click.echo(" Speculative decoding: disabled", err=True)
|
|
195
202
|
if no_auto_start:
|
|
196
203
|
click.echo(" Auto-start: disabled", err=True)
|
|
204
|
+
if inactive_timeout is not None:
|
|
205
|
+
click.echo(f" Inactive timeout: {inactive_timeout} minutes", err=True)
|
|
197
206
|
|
|
198
207
|
click.echo(f"Endpoint created successfully, id: {response.id}", err=True)
|
|
199
208
|
|
|
@@ -371,6 +380,11 @@ def list(
|
|
|
371
380
|
type=int,
|
|
372
381
|
help="New maximum number of replicas to scale up to",
|
|
373
382
|
)
|
|
383
|
+
@click.option(
|
|
384
|
+
"--inactive-timeout",
|
|
385
|
+
type=int,
|
|
386
|
+
help="Number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable.",
|
|
387
|
+
)
|
|
374
388
|
@click.pass_obj
|
|
375
389
|
@handle_api_errors
|
|
376
390
|
def update(
|
|
@@ -379,9 +393,10 @@ def update(
|
|
|
379
393
|
display_name: str | None,
|
|
380
394
|
min_replicas: int | None,
|
|
381
395
|
max_replicas: int | None,
|
|
396
|
+
inactive_timeout: int | None,
|
|
382
397
|
) -> None:
|
|
383
398
|
"""Update a dedicated inference endpoint's configuration."""
|
|
384
|
-
if not any([display_name, min_replicas, max_replicas]):
|
|
399
|
+
if not any([display_name, min_replicas, max_replicas, inactive_timeout]):
|
|
385
400
|
click.echo("Error: At least one update option must be specified", err=True)
|
|
386
401
|
sys.exit(1)
|
|
387
402
|
|
|
@@ -400,6 +415,8 @@ def update(
|
|
|
400
415
|
if min_replicas is not None and max_replicas is not None:
|
|
401
416
|
kwargs["min_replicas"] = min_replicas
|
|
402
417
|
kwargs["max_replicas"] = max_replicas
|
|
418
|
+
if inactive_timeout is not None:
|
|
419
|
+
kwargs["inactive_timeout"] = inactive_timeout
|
|
403
420
|
|
|
404
421
|
_response = client.endpoints.update(endpoint_id, **kwargs)
|
|
405
422
|
|
|
@@ -410,6 +427,8 @@ def update(
|
|
|
410
427
|
if min_replicas is not None and max_replicas is not None:
|
|
411
428
|
click.echo(f" Min replicas: {min_replicas}", err=True)
|
|
412
429
|
click.echo(f" Max replicas: {max_replicas}", err=True)
|
|
430
|
+
if inactive_timeout is not None:
|
|
431
|
+
click.echo(f" Inactive timeout: {inactive_timeout} minutes", err=True)
|
|
413
432
|
|
|
414
433
|
click.echo("Successfully updated endpoint", err=True)
|
|
415
434
|
click.echo(endpoint_id)
|
|
@@ -58,30 +58,50 @@ def fine_tuning(ctx: click.Context) -> None:
|
|
|
58
58
|
@fine_tuning.command()
|
|
59
59
|
@click.pass_context
|
|
60
60
|
@click.option(
|
|
61
|
-
"--training-file",
|
|
61
|
+
"--training-file",
|
|
62
|
+
"-t",
|
|
63
|
+
type=str,
|
|
64
|
+
required=True,
|
|
65
|
+
help="Training file ID from Files API",
|
|
66
|
+
)
|
|
67
|
+
@click.option("--model", "-m", type=str, help="Base model name")
|
|
68
|
+
@click.option(
|
|
69
|
+
"--n-epochs", "-ne", type=int, default=1, help="Number of epochs to train for"
|
|
62
70
|
)
|
|
63
|
-
@click.option("--model", type=str, required=True, help="Base model name")
|
|
64
|
-
@click.option("--n-epochs", type=int, default=1, help="Number of epochs to train for")
|
|
65
71
|
@click.option(
|
|
66
72
|
"--validation-file", type=str, default="", help="Validation file ID from Files API"
|
|
67
73
|
)
|
|
68
74
|
@click.option("--n-evals", type=int, default=0, help="Number of evaluation loops")
|
|
69
75
|
@click.option(
|
|
70
|
-
"--n-checkpoints", type=int, default=1, help="Number of checkpoints to save"
|
|
76
|
+
"--n-checkpoints", "-c", type=int, default=1, help="Number of checkpoints to save"
|
|
77
|
+
)
|
|
78
|
+
@click.option(
|
|
79
|
+
"--batch-size", "-b", type=INT_WITH_MAX, default="max", help="Train batch size"
|
|
80
|
+
)
|
|
81
|
+
@click.option("--learning-rate", "-lr", type=float, default=1e-5, help="Learning rate")
|
|
82
|
+
@click.option(
|
|
83
|
+
"--lr-scheduler-type",
|
|
84
|
+
type=click.Choice(["linear", "cosine"]),
|
|
85
|
+
default="linear",
|
|
86
|
+
help="Learning rate scheduler type",
|
|
71
87
|
)
|
|
72
|
-
@click.option("--batch-size", type=INT_WITH_MAX, default="max", help="Train batch size")
|
|
73
|
-
@click.option("--learning-rate", type=float, default=1e-5, help="Learning rate")
|
|
74
88
|
@click.option(
|
|
75
89
|
"--min-lr-ratio",
|
|
76
90
|
type=float,
|
|
77
91
|
default=0.0,
|
|
78
92
|
help="The ratio of the final learning rate to the peak learning rate",
|
|
79
93
|
)
|
|
94
|
+
@click.option(
|
|
95
|
+
"--scheduler-num-cycles",
|
|
96
|
+
type=float,
|
|
97
|
+
default=0.5,
|
|
98
|
+
help="Number or fraction of cycles for the cosine learning rate scheduler.",
|
|
99
|
+
)
|
|
80
100
|
@click.option(
|
|
81
101
|
"--warmup-ratio",
|
|
82
102
|
type=float,
|
|
83
103
|
default=0.0,
|
|
84
|
-
help="Warmup ratio for learning rate scheduler.",
|
|
104
|
+
help="Warmup ratio for the learning rate scheduler.",
|
|
85
105
|
)
|
|
86
106
|
@click.option(
|
|
87
107
|
"--max-grad-norm",
|
|
@@ -123,7 +143,11 @@ def fine_tuning(ctx: click.Context) -> None:
|
|
|
123
143
|
help="Beta parameter for DPO training (only used when '--training-method' is 'dpo')",
|
|
124
144
|
)
|
|
125
145
|
@click.option(
|
|
126
|
-
"--suffix",
|
|
146
|
+
"--suffix",
|
|
147
|
+
"-s",
|
|
148
|
+
type=str,
|
|
149
|
+
default=None,
|
|
150
|
+
help="Suffix for the fine-tuned model name",
|
|
127
151
|
)
|
|
128
152
|
@click.option("--wandb-api-key", type=str, default=None, help="Wandb API key")
|
|
129
153
|
@click.option("--wandb-base-url", type=str, default=None, help="Wandb base URL")
|
|
@@ -162,7 +186,9 @@ def create(
|
|
|
162
186
|
n_checkpoints: int,
|
|
163
187
|
batch_size: int | Literal["max"],
|
|
164
188
|
learning_rate: float,
|
|
189
|
+
lr_scheduler_type: Literal["linear", "cosine"],
|
|
165
190
|
min_lr_ratio: float,
|
|
191
|
+
scheduler_num_cycles: float,
|
|
166
192
|
warmup_ratio: float,
|
|
167
193
|
max_grad_norm: float,
|
|
168
194
|
weight_decay: float,
|
|
@@ -194,7 +220,9 @@ def create(
|
|
|
194
220
|
n_checkpoints=n_checkpoints,
|
|
195
221
|
batch_size=batch_size,
|
|
196
222
|
learning_rate=learning_rate,
|
|
223
|
+
lr_scheduler_type=lr_scheduler_type,
|
|
197
224
|
min_lr_ratio=min_lr_ratio,
|
|
225
|
+
scheduler_num_cycles=scheduler_num_cycles,
|
|
198
226
|
warmup_ratio=warmup_ratio,
|
|
199
227
|
max_grad_norm=max_grad_norm,
|
|
200
228
|
weight_decay=weight_decay,
|
|
@@ -214,8 +242,15 @@ def create(
|
|
|
214
242
|
from_checkpoint=from_checkpoint,
|
|
215
243
|
)
|
|
216
244
|
|
|
245
|
+
if model is None and from_checkpoint is None:
|
|
246
|
+
raise click.BadParameter("You must specify either a model or a checkpoint")
|
|
247
|
+
|
|
248
|
+
model_name = model
|
|
249
|
+
if from_checkpoint is not None:
|
|
250
|
+
model_name = from_checkpoint.split(":")[0]
|
|
251
|
+
|
|
217
252
|
model_limits: FinetuneTrainingLimits = client.fine_tuning.get_model_limits(
|
|
218
|
-
model=
|
|
253
|
+
model=model_name
|
|
219
254
|
)
|
|
220
255
|
|
|
221
256
|
if lora:
|
|
@@ -411,6 +446,7 @@ def list_checkpoints(ctx: click.Context, fine_tune_id: str) -> None:
|
|
|
411
446
|
@click.argument("fine_tune_id", type=str, required=True)
|
|
412
447
|
@click.option(
|
|
413
448
|
"--output_dir",
|
|
449
|
+
"-o",
|
|
414
450
|
type=click.Path(exists=True, file_okay=False, resolve_path=True),
|
|
415
451
|
required=False,
|
|
416
452
|
default=None,
|
|
@@ -418,6 +454,7 @@ def list_checkpoints(ctx: click.Context, fine_tune_id: str) -> None:
|
|
|
418
454
|
)
|
|
419
455
|
@click.option(
|
|
420
456
|
"--checkpoint-step",
|
|
457
|
+
"-s",
|
|
421
458
|
type=int,
|
|
422
459
|
required=False,
|
|
423
460
|
default=None,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
import json as json_lib
|
|
2
2
|
|
|
3
3
|
import click
|
|
4
4
|
from tabulate import tabulate
|
|
@@ -15,12 +15,22 @@ def models(ctx: click.Context) -> None:
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
@models.command()
|
|
18
|
+
@click.option(
|
|
19
|
+
"--type",
|
|
20
|
+
type=click.Choice(["dedicated"]),
|
|
21
|
+
help="Filter models by type (dedicated: models that can be deployed as dedicated endpoints)",
|
|
22
|
+
)
|
|
23
|
+
@click.option(
|
|
24
|
+
"--json",
|
|
25
|
+
is_flag=True,
|
|
26
|
+
help="Output in JSON format",
|
|
27
|
+
)
|
|
18
28
|
@click.pass_context
|
|
19
|
-
def list(ctx: click.Context) -> None:
|
|
29
|
+
def list(ctx: click.Context, type: str | None, json: bool) -> None:
|
|
20
30
|
"""List models"""
|
|
21
31
|
client: Together = ctx.obj
|
|
22
32
|
|
|
23
|
-
response = client.models.list()
|
|
33
|
+
response = client.models.list(dedicated=(type == "dedicated"))
|
|
24
34
|
|
|
25
35
|
display_list = []
|
|
26
36
|
|
|
@@ -28,15 +38,18 @@ def list(ctx: click.Context) -> None:
|
|
|
28
38
|
for model in response:
|
|
29
39
|
display_list.append(
|
|
30
40
|
{
|
|
31
|
-
"ID":
|
|
32
|
-
"Name":
|
|
41
|
+
"ID": model.id,
|
|
42
|
+
"Name": model.display_name,
|
|
33
43
|
"Organization": model.organization,
|
|
34
44
|
"Type": model.type,
|
|
35
45
|
"Context Length": model.context_length,
|
|
36
|
-
"License":
|
|
46
|
+
"License": model.license,
|
|
37
47
|
"Input per 1M token": model.pricing.input,
|
|
38
48
|
"Output per 1M token": model.pricing.output,
|
|
39
49
|
}
|
|
40
50
|
)
|
|
41
51
|
|
|
42
|
-
|
|
52
|
+
if json:
|
|
53
|
+
click.echo(json_lib.dumps(display_list, indent=2))
|
|
54
|
+
else:
|
|
55
|
+
click.echo(tabulate(display_list, headers="keys", tablefmt="plain"))
|
|
@@ -59,6 +59,7 @@ class Endpoints:
|
|
|
59
59
|
disable_prompt_cache: bool = False,
|
|
60
60
|
disable_speculative_decoding: bool = False,
|
|
61
61
|
state: Literal["STARTED", "STOPPED"] = "STARTED",
|
|
62
|
+
inactive_timeout: Optional[int] = None,
|
|
62
63
|
) -> DedicatedEndpoint:
|
|
63
64
|
"""
|
|
64
65
|
Create a new dedicated endpoint.
|
|
@@ -72,6 +73,7 @@ class Endpoints:
|
|
|
72
73
|
disable_prompt_cache (bool, optional): Whether to disable the prompt cache. Defaults to False.
|
|
73
74
|
disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
|
|
74
75
|
state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
|
|
76
|
+
inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
|
|
75
77
|
|
|
76
78
|
Returns:
|
|
77
79
|
DedicatedEndpoint: Object containing endpoint information
|
|
@@ -80,7 +82,7 @@ class Endpoints:
|
|
|
80
82
|
client=self._client,
|
|
81
83
|
)
|
|
82
84
|
|
|
83
|
-
data: Dict[str, Union[str, bool, Dict[str, int]]] = {
|
|
85
|
+
data: Dict[str, Union[str, bool, Dict[str, int], int]] = {
|
|
84
86
|
"model": model,
|
|
85
87
|
"hardware": hardware,
|
|
86
88
|
"autoscaling": {
|
|
@@ -95,6 +97,9 @@ class Endpoints:
|
|
|
95
97
|
if display_name is not None:
|
|
96
98
|
data["display_name"] = display_name
|
|
97
99
|
|
|
100
|
+
if inactive_timeout is not None:
|
|
101
|
+
data["inactive_timeout"] = inactive_timeout
|
|
102
|
+
|
|
98
103
|
response, _, _ = requestor.request(
|
|
99
104
|
options=TogetherRequest(
|
|
100
105
|
method="POST",
|
|
@@ -161,6 +166,7 @@ class Endpoints:
|
|
|
161
166
|
max_replicas: Optional[int] = None,
|
|
162
167
|
state: Optional[Literal["STARTED", "STOPPED"]] = None,
|
|
163
168
|
display_name: Optional[str] = None,
|
|
169
|
+
inactive_timeout: Optional[int] = None,
|
|
164
170
|
) -> DedicatedEndpoint:
|
|
165
171
|
"""
|
|
166
172
|
Update an endpoint's configuration.
|
|
@@ -171,6 +177,7 @@ class Endpoints:
|
|
|
171
177
|
max_replicas (int, optional): The maximum number of replicas to scale up to
|
|
172
178
|
state (str, optional): The desired state of the endpoint ("STARTED" or "STOPPED")
|
|
173
179
|
display_name (str, optional): A human-readable name for the endpoint
|
|
180
|
+
inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
|
|
174
181
|
|
|
175
182
|
Returns:
|
|
176
183
|
DedicatedEndpoint: Object containing endpoint information
|
|
@@ -179,7 +186,7 @@ class Endpoints:
|
|
|
179
186
|
client=self._client,
|
|
180
187
|
)
|
|
181
188
|
|
|
182
|
-
data: Dict[str, Union[str, Dict[str, int]]] = {}
|
|
189
|
+
data: Dict[str, Union[str, Dict[str, int], int]] = {}
|
|
183
190
|
|
|
184
191
|
if min_replicas is not None or max_replicas is not None:
|
|
185
192
|
current_min = min_replicas
|
|
@@ -200,6 +207,9 @@ class Endpoints:
|
|
|
200
207
|
if display_name is not None:
|
|
201
208
|
data["display_name"] = display_name
|
|
202
209
|
|
|
210
|
+
if inactive_timeout is not None:
|
|
211
|
+
data["inactive_timeout"] = inactive_timeout
|
|
212
|
+
|
|
203
213
|
response, _, _ = requestor.request(
|
|
204
214
|
options=TogetherRequest(
|
|
205
215
|
method="PATCH",
|
|
@@ -297,6 +307,7 @@ class AsyncEndpoints:
|
|
|
297
307
|
disable_prompt_cache: bool = False,
|
|
298
308
|
disable_speculative_decoding: bool = False,
|
|
299
309
|
state: Literal["STARTED", "STOPPED"] = "STARTED",
|
|
310
|
+
inactive_timeout: Optional[int] = None,
|
|
300
311
|
) -> DedicatedEndpoint:
|
|
301
312
|
"""
|
|
302
313
|
Create a new dedicated endpoint.
|
|
@@ -310,6 +321,7 @@ class AsyncEndpoints:
|
|
|
310
321
|
disable_prompt_cache (bool, optional): Whether to disable the prompt cache. Defaults to False.
|
|
311
322
|
disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
|
|
312
323
|
state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
|
|
324
|
+
inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
|
|
313
325
|
|
|
314
326
|
Returns:
|
|
315
327
|
DedicatedEndpoint: Object containing endpoint information
|
|
@@ -318,7 +330,7 @@ class AsyncEndpoints:
|
|
|
318
330
|
client=self._client,
|
|
319
331
|
)
|
|
320
332
|
|
|
321
|
-
data: Dict[str, Union[str, bool, Dict[str, int]]] = {
|
|
333
|
+
data: Dict[str, Union[str, bool, Dict[str, int], int]] = {
|
|
322
334
|
"model": model,
|
|
323
335
|
"hardware": hardware,
|
|
324
336
|
"autoscaling": {
|
|
@@ -333,6 +345,9 @@ class AsyncEndpoints:
|
|
|
333
345
|
if display_name is not None:
|
|
334
346
|
data["display_name"] = display_name
|
|
335
347
|
|
|
348
|
+
if inactive_timeout is not None:
|
|
349
|
+
data["inactive_timeout"] = inactive_timeout
|
|
350
|
+
|
|
336
351
|
response, _, _ = await requestor.arequest(
|
|
337
352
|
options=TogetherRequest(
|
|
338
353
|
method="POST",
|
|
@@ -399,6 +414,7 @@ class AsyncEndpoints:
|
|
|
399
414
|
max_replicas: Optional[int] = None,
|
|
400
415
|
state: Optional[Literal["STARTED", "STOPPED"]] = None,
|
|
401
416
|
display_name: Optional[str] = None,
|
|
417
|
+
inactive_timeout: Optional[int] = None,
|
|
402
418
|
) -> DedicatedEndpoint:
|
|
403
419
|
"""
|
|
404
420
|
Update an endpoint's configuration.
|
|
@@ -409,6 +425,7 @@ class AsyncEndpoints:
|
|
|
409
425
|
max_replicas (int, optional): The maximum number of replicas to scale up to
|
|
410
426
|
state (str, optional): The desired state of the endpoint ("STARTED" or "STOPPED")
|
|
411
427
|
display_name (str, optional): A human-readable name for the endpoint
|
|
428
|
+
inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
|
|
412
429
|
|
|
413
430
|
Returns:
|
|
414
431
|
DedicatedEndpoint: Object containing endpoint information
|
|
@@ -417,7 +434,7 @@ class AsyncEndpoints:
|
|
|
417
434
|
client=self._client,
|
|
418
435
|
)
|
|
419
436
|
|
|
420
|
-
data: Dict[str, Union[str, Dict[str, int]]] = {}
|
|
437
|
+
data: Dict[str, Union[str, Dict[str, int], int]] = {}
|
|
421
438
|
|
|
422
439
|
if min_replicas is not None or max_replicas is not None:
|
|
423
440
|
current_min = min_replicas
|
|
@@ -438,6 +455,9 @@ class AsyncEndpoints:
|
|
|
438
455
|
if display_name is not None:
|
|
439
456
|
data["display_name"] = display_name
|
|
440
457
|
|
|
458
|
+
if inactive_timeout is not None:
|
|
459
|
+
data["inactive_timeout"] = inactive_timeout
|
|
460
|
+
|
|
441
461
|
response, _, _ = await requestor.arequest(
|
|
442
462
|
options=TogetherRequest(
|
|
443
463
|
method="PATCH",
|
|
@@ -22,7 +22,10 @@ from together.types import (
|
|
|
22
22
|
TogetherRequest,
|
|
23
23
|
TrainingType,
|
|
24
24
|
FinetuneLRScheduler,
|
|
25
|
+
FinetuneLinearLRScheduler,
|
|
26
|
+
FinetuneCosineLRScheduler,
|
|
25
27
|
FinetuneLinearLRSchedulerArgs,
|
|
28
|
+
FinetuneCosineLRSchedulerArgs,
|
|
26
29
|
TrainingMethodDPO,
|
|
27
30
|
TrainingMethodSFT,
|
|
28
31
|
FinetuneCheckpoint,
|
|
@@ -50,14 +53,16 @@ AVAILABLE_TRAINING_METHODS = {
|
|
|
50
53
|
def createFinetuneRequest(
|
|
51
54
|
model_limits: FinetuneTrainingLimits,
|
|
52
55
|
training_file: str,
|
|
53
|
-
model: str,
|
|
56
|
+
model: str | None = None,
|
|
54
57
|
n_epochs: int = 1,
|
|
55
58
|
validation_file: str | None = "",
|
|
56
59
|
n_evals: int | None = 0,
|
|
57
60
|
n_checkpoints: int | None = 1,
|
|
58
61
|
batch_size: int | Literal["max"] = "max",
|
|
59
62
|
learning_rate: float | None = 0.00001,
|
|
63
|
+
lr_scheduler_type: Literal["linear", "cosine"] = "linear",
|
|
60
64
|
min_lr_ratio: float = 0.0,
|
|
65
|
+
scheduler_num_cycles: float = 0.5,
|
|
61
66
|
warmup_ratio: float = 0.0,
|
|
62
67
|
max_grad_norm: float = 1.0,
|
|
63
68
|
weight_decay: float = 0.0,
|
|
@@ -77,6 +82,11 @@ def createFinetuneRequest(
|
|
|
77
82
|
from_checkpoint: str | None = None,
|
|
78
83
|
) -> FinetuneRequest:
|
|
79
84
|
|
|
85
|
+
if model is not None and from_checkpoint is not None:
|
|
86
|
+
raise ValueError(
|
|
87
|
+
"You must specify either a model or a checkpoint to start a job from, not both"
|
|
88
|
+
)
|
|
89
|
+
|
|
80
90
|
if batch_size == "max":
|
|
81
91
|
log_warn_once(
|
|
82
92
|
"Starting from together>=1.3.0, "
|
|
@@ -129,10 +139,22 @@ def createFinetuneRequest(
|
|
|
129
139
|
f"training_method must be one of {', '.join(AVAILABLE_TRAINING_METHODS)}"
|
|
130
140
|
)
|
|
131
141
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
142
|
+
# Default to generic lr scheduler
|
|
143
|
+
lrScheduler: FinetuneLRScheduler = FinetuneLRScheduler(lr_scheduler_type="linear")
|
|
144
|
+
|
|
145
|
+
if lr_scheduler_type == "cosine":
|
|
146
|
+
if scheduler_num_cycles <= 0.0:
|
|
147
|
+
raise ValueError("Number of cycles should be greater than 0")
|
|
148
|
+
|
|
149
|
+
lrScheduler = FinetuneCosineLRScheduler(
|
|
150
|
+
lr_scheduler_args=FinetuneCosineLRSchedulerArgs(
|
|
151
|
+
min_lr_ratio=min_lr_ratio, num_cycles=scheduler_num_cycles
|
|
152
|
+
),
|
|
153
|
+
)
|
|
154
|
+
else:
|
|
155
|
+
lrScheduler = FinetuneLinearLRScheduler(
|
|
156
|
+
lr_scheduler_args=FinetuneLinearLRSchedulerArgs(min_lr_ratio=min_lr_ratio),
|
|
157
|
+
)
|
|
136
158
|
|
|
137
159
|
training_method_cls: TrainingMethodSFT | TrainingMethodDPO = TrainingMethodSFT()
|
|
138
160
|
if training_method == "dpo":
|
|
@@ -237,14 +259,16 @@ class FineTuning:
|
|
|
237
259
|
self,
|
|
238
260
|
*,
|
|
239
261
|
training_file: str,
|
|
240
|
-
model: str,
|
|
262
|
+
model: str | None = None,
|
|
241
263
|
n_epochs: int = 1,
|
|
242
264
|
validation_file: str | None = "",
|
|
243
265
|
n_evals: int | None = 0,
|
|
244
266
|
n_checkpoints: int | None = 1,
|
|
245
267
|
batch_size: int | Literal["max"] = "max",
|
|
246
268
|
learning_rate: float | None = 0.00001,
|
|
269
|
+
lr_scheduler_type: Literal["linear", "cosine"] = "linear",
|
|
247
270
|
min_lr_ratio: float = 0.0,
|
|
271
|
+
scheduler_num_cycles: float = 0.5,
|
|
248
272
|
warmup_ratio: float = 0.0,
|
|
249
273
|
max_grad_norm: float = 1.0,
|
|
250
274
|
weight_decay: float = 0.0,
|
|
@@ -270,7 +294,7 @@ class FineTuning:
|
|
|
270
294
|
|
|
271
295
|
Args:
|
|
272
296
|
training_file (str): File-ID of a file uploaded to the Together API
|
|
273
|
-
model (str): Name of the base model to run fine-tune job on
|
|
297
|
+
model (str, optional): Name of the base model to run fine-tune job on
|
|
274
298
|
n_epochs (int, optional): Number of epochs for fine-tuning. Defaults to 1.
|
|
275
299
|
validation file (str, optional): File ID of a file uploaded to the Together API for validation.
|
|
276
300
|
n_evals (int, optional): Number of evaluation loops to run. Defaults to 0.
|
|
@@ -279,9 +303,11 @@ class FineTuning:
|
|
|
279
303
|
batch_size (int or "max"): Batch size for fine-tuning. Defaults to max.
|
|
280
304
|
learning_rate (float, optional): Learning rate multiplier to use for training
|
|
281
305
|
Defaults to 0.00001.
|
|
306
|
+
lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "linear".
|
|
282
307
|
min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
|
|
283
308
|
the learning rate scheduler. Defaults to 0.0.
|
|
284
|
-
|
|
309
|
+
scheduler_num_cycles (float, optional): Number or fraction of cycles for the cosine learning rate scheduler. Defaults to 0.5.
|
|
310
|
+
warmup_ratio (float, optional): Warmup ratio for the learning rate scheduler.
|
|
285
311
|
max_grad_norm (float, optional): Max gradient norm. Defaults to 1.0, set to 0 to disable.
|
|
286
312
|
weight_decay (float, optional): Weight decay. Defaults to 0.0.
|
|
287
313
|
lora (bool, optional): Whether to use LoRA adapters. Defaults to True.
|
|
@@ -320,12 +346,24 @@ class FineTuning:
|
|
|
320
346
|
FinetuneResponse: Object containing information about fine-tuning job.
|
|
321
347
|
"""
|
|
322
348
|
|
|
349
|
+
if model is None and from_checkpoint is None:
|
|
350
|
+
raise ValueError("You must specify either a model or a checkpoint")
|
|
351
|
+
|
|
323
352
|
requestor = api_requestor.APIRequestor(
|
|
324
353
|
client=self._client,
|
|
325
354
|
)
|
|
326
355
|
|
|
327
356
|
if model_limits is None:
|
|
328
|
-
|
|
357
|
+
# mypy doesn't understand that model or from_checkpoint is not None
|
|
358
|
+
if model is not None:
|
|
359
|
+
model_name = model
|
|
360
|
+
elif from_checkpoint is not None:
|
|
361
|
+
model_name = from_checkpoint.split(":")[0]
|
|
362
|
+
else:
|
|
363
|
+
# this branch is unreachable, but mypy doesn't know that
|
|
364
|
+
pass
|
|
365
|
+
model_limits = self.get_model_limits(model=model_name)
|
|
366
|
+
|
|
329
367
|
finetune_request = createFinetuneRequest(
|
|
330
368
|
model_limits=model_limits,
|
|
331
369
|
training_file=training_file,
|
|
@@ -336,7 +374,9 @@ class FineTuning:
|
|
|
336
374
|
n_checkpoints=n_checkpoints,
|
|
337
375
|
batch_size=batch_size,
|
|
338
376
|
learning_rate=learning_rate,
|
|
377
|
+
lr_scheduler_type=lr_scheduler_type,
|
|
339
378
|
min_lr_ratio=min_lr_ratio,
|
|
379
|
+
scheduler_num_cycles=scheduler_num_cycles,
|
|
340
380
|
warmup_ratio=warmup_ratio,
|
|
341
381
|
max_grad_norm=max_grad_norm,
|
|
342
382
|
weight_decay=weight_decay,
|
|
@@ -610,14 +650,16 @@ class AsyncFineTuning:
|
|
|
610
650
|
self,
|
|
611
651
|
*,
|
|
612
652
|
training_file: str,
|
|
613
|
-
model: str,
|
|
653
|
+
model: str | None = None,
|
|
614
654
|
n_epochs: int = 1,
|
|
615
655
|
validation_file: str | None = "",
|
|
616
656
|
n_evals: int | None = 0,
|
|
617
657
|
n_checkpoints: int | None = 1,
|
|
618
658
|
batch_size: int | Literal["max"] = "max",
|
|
619
659
|
learning_rate: float | None = 0.00001,
|
|
660
|
+
lr_scheduler_type: Literal["linear", "cosine"] = "linear",
|
|
620
661
|
min_lr_ratio: float = 0.0,
|
|
662
|
+
scheduler_num_cycles: float = 0.5,
|
|
621
663
|
warmup_ratio: float = 0.0,
|
|
622
664
|
max_grad_norm: float = 1.0,
|
|
623
665
|
weight_decay: float = 0.0,
|
|
@@ -643,7 +685,7 @@ class AsyncFineTuning:
|
|
|
643
685
|
|
|
644
686
|
Args:
|
|
645
687
|
training_file (str): File-ID of a file uploaded to the Together API
|
|
646
|
-
model (str): Name of the base model to run fine-tune job on
|
|
688
|
+
model (str, optional): Name of the base model to run fine-tune job on
|
|
647
689
|
n_epochs (int, optional): Number of epochs for fine-tuning. Defaults to 1.
|
|
648
690
|
validation file (str, optional): File ID of a file uploaded to the Together API for validation.
|
|
649
691
|
n_evals (int, optional): Number of evaluation loops to run. Defaults to 0.
|
|
@@ -652,9 +694,11 @@ class AsyncFineTuning:
|
|
|
652
694
|
batch_size (int, optional): Batch size for fine-tuning. Defaults to max.
|
|
653
695
|
learning_rate (float, optional): Learning rate multiplier to use for training
|
|
654
696
|
Defaults to 0.00001.
|
|
697
|
+
lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "linear".
|
|
655
698
|
min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
|
|
656
699
|
the learning rate scheduler. Defaults to 0.0.
|
|
657
|
-
|
|
700
|
+
scheduler_num_cycles (float, optional): Number or fraction of cycles for the cosine learning rate scheduler. Defaults to 0.5.
|
|
701
|
+
warmup_ratio (float, optional): Warmup ratio for the learning rate scheduler.
|
|
658
702
|
max_grad_norm (float, optional): Max gradient norm. Defaults to 1.0, set to 0 to disable.
|
|
659
703
|
weight_decay (float, optional): Weight decay. Defaults to 0.0.
|
|
660
704
|
lora (bool, optional): Whether to use LoRA adapters. Defaults to True.
|
|
@@ -693,12 +737,23 @@ class AsyncFineTuning:
|
|
|
693
737
|
FinetuneResponse: Object containing information about fine-tuning job.
|
|
694
738
|
"""
|
|
695
739
|
|
|
740
|
+
if model is None and from_checkpoint is None:
|
|
741
|
+
raise ValueError("You must specify either a model or a checkpoint")
|
|
742
|
+
|
|
696
743
|
requestor = api_requestor.APIRequestor(
|
|
697
744
|
client=self._client,
|
|
698
745
|
)
|
|
699
746
|
|
|
700
747
|
if model_limits is None:
|
|
701
|
-
|
|
748
|
+
# mypy doesn't understand that model or from_checkpoint is not None
|
|
749
|
+
if model is not None:
|
|
750
|
+
model_name = model
|
|
751
|
+
elif from_checkpoint is not None:
|
|
752
|
+
model_name = from_checkpoint.split(":")[0]
|
|
753
|
+
else:
|
|
754
|
+
# this branch is unreachable, but mypy doesn't know that
|
|
755
|
+
pass
|
|
756
|
+
model_limits = await self.get_model_limits(model=model_name)
|
|
702
757
|
|
|
703
758
|
finetune_request = createFinetuneRequest(
|
|
704
759
|
model_limits=model_limits,
|
|
@@ -710,7 +765,9 @@ class AsyncFineTuning:
|
|
|
710
765
|
n_checkpoints=n_checkpoints,
|
|
711
766
|
batch_size=batch_size,
|
|
712
767
|
learning_rate=learning_rate,
|
|
768
|
+
lr_scheduler_type=lr_scheduler_type,
|
|
713
769
|
min_lr_ratio=min_lr_ratio,
|
|
770
|
+
scheduler_num_cycles=scheduler_num_cycles,
|
|
714
771
|
warmup_ratio=warmup_ratio,
|
|
715
772
|
max_grad_norm=max_grad_norm,
|
|
716
773
|
weight_decay=weight_decay,
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
from together.abstract import api_requestor
|
|
6
|
+
from together.together_response import TogetherResponse
|
|
7
|
+
from together.types import (
|
|
8
|
+
ModelObject,
|
|
9
|
+
TogetherClient,
|
|
10
|
+
TogetherRequest,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ModelsBase:
|
|
15
|
+
def __init__(self, client: TogetherClient) -> None:
|
|
16
|
+
self._client = client
|
|
17
|
+
|
|
18
|
+
def _filter_dedicated_models(
|
|
19
|
+
self, models: List[ModelObject], dedicated_response: TogetherResponse
|
|
20
|
+
) -> List[ModelObject]:
|
|
21
|
+
"""
|
|
22
|
+
Filter models based on dedicated model response.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
models (List[ModelObject]): List of all models
|
|
26
|
+
dedicated_response (TogetherResponse): Response from autoscale models endpoint
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
List[ModelObject]: Filtered list of models
|
|
30
|
+
"""
|
|
31
|
+
assert isinstance(dedicated_response.data, list)
|
|
32
|
+
|
|
33
|
+
# Create a set of dedicated model names for efficient lookup
|
|
34
|
+
dedicated_model_names = {model["name"] for model in dedicated_response.data}
|
|
35
|
+
|
|
36
|
+
# Filter models to only include those in dedicated_model_names
|
|
37
|
+
# Note: The model.id from ModelObject matches the name field in the autoscale response
|
|
38
|
+
return [model for model in models if model.id in dedicated_model_names]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class Models(ModelsBase):
|
|
42
|
+
def list(
|
|
43
|
+
self,
|
|
44
|
+
dedicated: bool = False,
|
|
45
|
+
) -> List[ModelObject]:
|
|
46
|
+
"""
|
|
47
|
+
Method to return list of models on the API
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
dedicated (bool, optional): If True, returns only dedicated models. Defaults to False.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
List[ModelObject]: List of model objects
|
|
54
|
+
"""
|
|
55
|
+
requestor = api_requestor.APIRequestor(
|
|
56
|
+
client=self._client,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
response, _, _ = requestor.request(
|
|
60
|
+
options=TogetherRequest(
|
|
61
|
+
method="GET",
|
|
62
|
+
url="models",
|
|
63
|
+
),
|
|
64
|
+
stream=False,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
assert isinstance(response, TogetherResponse)
|
|
68
|
+
assert isinstance(response.data, list)
|
|
69
|
+
|
|
70
|
+
models = [ModelObject(**model) for model in response.data]
|
|
71
|
+
|
|
72
|
+
if dedicated:
|
|
73
|
+
# Get dedicated models
|
|
74
|
+
dedicated_response, _, _ = requestor.request(
|
|
75
|
+
options=TogetherRequest(
|
|
76
|
+
method="GET",
|
|
77
|
+
url="autoscale/models",
|
|
78
|
+
),
|
|
79
|
+
stream=False,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
models = self._filter_dedicated_models(models, dedicated_response)
|
|
83
|
+
|
|
84
|
+
models.sort(key=lambda x: x.id.lower())
|
|
85
|
+
|
|
86
|
+
return models
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class AsyncModels(ModelsBase):
|
|
90
|
+
async def list(
|
|
91
|
+
self,
|
|
92
|
+
dedicated: bool = False,
|
|
93
|
+
) -> List[ModelObject]:
|
|
94
|
+
"""
|
|
95
|
+
Async method to return list of models on API
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
dedicated (bool, optional): If True, returns only dedicated models. Defaults to False.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
List[ModelObject]: List of model objects
|
|
102
|
+
"""
|
|
103
|
+
requestor = api_requestor.APIRequestor(
|
|
104
|
+
client=self._client,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
response, _, _ = await requestor.arequest(
|
|
108
|
+
options=TogetherRequest(
|
|
109
|
+
method="GET",
|
|
110
|
+
url="models",
|
|
111
|
+
),
|
|
112
|
+
stream=False,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
assert isinstance(response, TogetherResponse)
|
|
116
|
+
assert isinstance(response.data, list)
|
|
117
|
+
|
|
118
|
+
models = [ModelObject(**model) for model in response.data]
|
|
119
|
+
|
|
120
|
+
if dedicated:
|
|
121
|
+
# Get dedicated models
|
|
122
|
+
dedicated_response, _, _ = await requestor.arequest(
|
|
123
|
+
options=TogetherRequest(
|
|
124
|
+
method="GET",
|
|
125
|
+
url="autoscale/models",
|
|
126
|
+
),
|
|
127
|
+
stream=False,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
models = self._filter_dedicated_models(models, dedicated_response)
|
|
131
|
+
|
|
132
|
+
models.sort(key=lambda x: x.id.lower())
|
|
133
|
+
|
|
134
|
+
return models
|
|
@@ -34,11 +34,14 @@ from together.types.finetune import (
|
|
|
34
34
|
TrainingMethodDPO,
|
|
35
35
|
TrainingMethodSFT,
|
|
36
36
|
FinetuneCheckpoint,
|
|
37
|
+
FinetuneCosineLRScheduler,
|
|
38
|
+
FinetuneCosineLRSchedulerArgs,
|
|
37
39
|
FinetuneDownloadResult,
|
|
40
|
+
FinetuneLinearLRScheduler,
|
|
38
41
|
FinetuneLinearLRSchedulerArgs,
|
|
42
|
+
FinetuneLRScheduler,
|
|
39
43
|
FinetuneList,
|
|
40
44
|
FinetuneListEvents,
|
|
41
|
-
FinetuneLRScheduler,
|
|
42
45
|
FinetuneRequest,
|
|
43
46
|
FinetuneResponse,
|
|
44
47
|
FinetuneTrainingLimits,
|
|
@@ -69,7 +72,10 @@ __all__ = [
|
|
|
69
72
|
"FinetuneListEvents",
|
|
70
73
|
"FinetuneDownloadResult",
|
|
71
74
|
"FinetuneLRScheduler",
|
|
75
|
+
"FinetuneLinearLRScheduler",
|
|
72
76
|
"FinetuneLinearLRSchedulerArgs",
|
|
77
|
+
"FinetuneCosineLRScheduler",
|
|
78
|
+
"FinetuneCosineLRSchedulerArgs",
|
|
73
79
|
"FileRequest",
|
|
74
80
|
"FileResponse",
|
|
75
81
|
"FileList",
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from enum import Enum
|
|
4
|
-
from typing import List, Literal
|
|
4
|
+
from typing import List, Literal, Union
|
|
5
5
|
|
|
6
|
-
from pydantic import StrictBool, Field, validator, field_validator
|
|
6
|
+
from pydantic import StrictBool, Field, validator, field_validator, ValidationInfo
|
|
7
7
|
|
|
8
8
|
from together.types.abstract import BaseModel
|
|
9
9
|
from together.types.common import (
|
|
@@ -176,7 +176,7 @@ class FinetuneRequest(BaseModel):
|
|
|
176
176
|
# training learning rate
|
|
177
177
|
learning_rate: float
|
|
178
178
|
# learning rate scheduler type and args
|
|
179
|
-
lr_scheduler:
|
|
179
|
+
lr_scheduler: FinetuneLinearLRScheduler | FinetuneCosineLRScheduler | None = None
|
|
180
180
|
# learning rate warmup ratio
|
|
181
181
|
warmup_ratio: float
|
|
182
182
|
# max gradient norm
|
|
@@ -239,7 +239,7 @@ class FinetuneResponse(BaseModel):
|
|
|
239
239
|
# training learning rate
|
|
240
240
|
learning_rate: float | None = None
|
|
241
241
|
# learning rate scheduler type and args
|
|
242
|
-
lr_scheduler:
|
|
242
|
+
lr_scheduler: FinetuneLinearLRScheduler | FinetuneCosineLRScheduler | None = None
|
|
243
243
|
# learning rate warmup ratio
|
|
244
244
|
warmup_ratio: float | None = None
|
|
245
245
|
# max gradient norm
|
|
@@ -345,13 +345,27 @@ class FinetuneTrainingLimits(BaseModel):
|
|
|
345
345
|
lora_training: FinetuneLoraTrainingLimits | None = None
|
|
346
346
|
|
|
347
347
|
|
|
348
|
+
class FinetuneLinearLRSchedulerArgs(BaseModel):
|
|
349
|
+
min_lr_ratio: float | None = 0.0
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
class FinetuneCosineLRSchedulerArgs(BaseModel):
|
|
353
|
+
min_lr_ratio: float | None = 0.0
|
|
354
|
+
num_cycles: float | None = 0.5
|
|
355
|
+
|
|
356
|
+
|
|
348
357
|
class FinetuneLRScheduler(BaseModel):
|
|
349
358
|
lr_scheduler_type: str
|
|
350
|
-
lr_scheduler_args: FinetuneLinearLRSchedulerArgs | None = None
|
|
351
359
|
|
|
352
360
|
|
|
353
|
-
class
|
|
354
|
-
|
|
361
|
+
class FinetuneLinearLRScheduler(FinetuneLRScheduler):
|
|
362
|
+
lr_scheduler_type: Literal["linear"] = "linear"
|
|
363
|
+
lr_scheduler: FinetuneLinearLRSchedulerArgs | None = None
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
class FinetuneCosineLRScheduler(FinetuneLRScheduler):
|
|
367
|
+
lr_scheduler_type: Literal["cosine"] = "cosine"
|
|
368
|
+
lr_scheduler: FinetuneCosineLRSchedulerArgs | None = None
|
|
355
369
|
|
|
356
370
|
|
|
357
371
|
class FinetuneCheckpoint(BaseModel):
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import List
|
|
4
|
-
|
|
5
|
-
from together.abstract import api_requestor
|
|
6
|
-
from together.together_response import TogetherResponse
|
|
7
|
-
from together.types import (
|
|
8
|
-
ModelObject,
|
|
9
|
-
TogetherClient,
|
|
10
|
-
TogetherRequest,
|
|
11
|
-
)
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class Models:
|
|
15
|
-
def __init__(self, client: TogetherClient) -> None:
|
|
16
|
-
self._client = client
|
|
17
|
-
|
|
18
|
-
def list(
|
|
19
|
-
self,
|
|
20
|
-
) -> List[ModelObject]:
|
|
21
|
-
"""
|
|
22
|
-
Method to return list of models on the API
|
|
23
|
-
|
|
24
|
-
Returns:
|
|
25
|
-
List[ModelObject]: List of model objects
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
requestor = api_requestor.APIRequestor(
|
|
29
|
-
client=self._client,
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
response, _, _ = requestor.request(
|
|
33
|
-
options=TogetherRequest(
|
|
34
|
-
method="GET",
|
|
35
|
-
url="models",
|
|
36
|
-
),
|
|
37
|
-
stream=False,
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
assert isinstance(response, TogetherResponse)
|
|
41
|
-
assert isinstance(response.data, list)
|
|
42
|
-
|
|
43
|
-
return [ModelObject(**model) for model in response.data]
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
class AsyncModels:
|
|
47
|
-
def __init__(self, client: TogetherClient) -> None:
|
|
48
|
-
self._client = client
|
|
49
|
-
|
|
50
|
-
async def list(
|
|
51
|
-
self,
|
|
52
|
-
) -> List[ModelObject]:
|
|
53
|
-
"""
|
|
54
|
-
Async method to return list of models on API
|
|
55
|
-
|
|
56
|
-
Returns:
|
|
57
|
-
List[ModelObject]: List of model objects
|
|
58
|
-
"""
|
|
59
|
-
|
|
60
|
-
requestor = api_requestor.APIRequestor(
|
|
61
|
-
client=self._client,
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
response, _, _ = await requestor.arequest(
|
|
65
|
-
options=TogetherRequest(
|
|
66
|
-
method="GET",
|
|
67
|
-
url="models",
|
|
68
|
-
),
|
|
69
|
-
stream=False,
|
|
70
|
-
)
|
|
71
|
-
|
|
72
|
-
assert isinstance(response, TogetherResponse)
|
|
73
|
-
assert isinstance(response.data, list)
|
|
74
|
-
|
|
75
|
-
return [ModelObject(**model) for model in response.data]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|