together 1.4.6__tar.gz → 1.5.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {together-1.4.6 → together-1.5.3}/PKG-INFO +3 -4
- {together-1.4.6 → together-1.5.3}/README.md +1 -1
- {together-1.4.6 → together-1.5.3}/pyproject.toml +2 -2
- {together-1.4.6 → together-1.5.3}/src/together/cli/api/finetune.py +46 -9
- {together-1.4.6 → together-1.5.3}/src/together/resources/finetune.py +86 -22
- {together-1.4.6 → together-1.5.3}/src/together/types/__init__.py +7 -1
- {together-1.4.6 → together-1.5.3}/src/together/types/finetune.py +22 -8
- {together-1.4.6 → together-1.5.3}/LICENSE +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/__init__.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/abstract/__init__.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/abstract/api_requestor.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/cli/__init__.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/cli/api/__init__.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/cli/api/chat.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/cli/api/completions.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/cli/api/endpoints.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/cli/api/files.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/cli/api/images.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/cli/api/models.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/cli/api/utils.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/cli/cli.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/client.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/constants.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/error.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/filemanager.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/legacy/__init__.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/legacy/base.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/legacy/complete.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/legacy/embeddings.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/legacy/files.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/legacy/finetune.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/legacy/images.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/legacy/models.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/resources/__init__.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/resources/audio/__init__.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/resources/audio/speech.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/resources/chat/__init__.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/resources/chat/completions.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/resources/completions.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/resources/embeddings.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/resources/endpoints.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/resources/files.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/resources/images.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/resources/models.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/resources/rerank.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/together_response.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/types/abstract.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/types/audio_speech.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/types/chat_completions.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/types/common.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/types/completions.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/types/embeddings.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/types/endpoints.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/types/error.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/types/files.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/types/images.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/types/models.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/types/rerank.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/utils/__init__.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/utils/_log.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/utils/api_helpers.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/utils/files.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/utils/tools.py +0 -0
- {together-1.4.6 → together-1.5.3}/src/together/version.py +0 -0
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: together
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.5.3
|
|
4
4
|
Summary: Python client for Together's Cloud Platform!
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Together AI
|
|
7
7
|
Author-email: support@together.ai
|
|
8
|
-
Requires-Python: >=3.
|
|
8
|
+
Requires-Python: >=3.10,<4.0
|
|
9
9
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
10
|
Classifier: Operating System :: POSIX :: Linux
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
@@ -45,7 +44,7 @@ Description-Content-Type: text/markdown
|
|
|
45
44
|
[](https://discord.com/invite/9Rk6sSeWEG)
|
|
46
45
|
[](https://twitter.com/togethercompute)
|
|
47
46
|
|
|
48
|
-
The [Together Python API Library](https://pypi.org/project/together/) is the official Python client for Together's API platform, providing a convenient way for interacting with the REST APIs and enables easy integrations with Python 3.
|
|
47
|
+
The [Together Python API Library](https://pypi.org/project/together/) is the official Python client for Together's API platform, providing a convenient way for interacting with the REST APIs and enables easy integrations with Python 3.10+ applications with easy to use synchronous and asynchronous clients.
|
|
49
48
|
|
|
50
49
|
|
|
51
50
|
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
[](https://discord.com/invite/9Rk6sSeWEG)
|
|
11
11
|
[](https://twitter.com/togethercompute)
|
|
12
12
|
|
|
13
|
-
The [Together Python API Library](https://pypi.org/project/together/) is the official Python client for Together's API platform, providing a convenient way for interacting with the REST APIs and enables easy integrations with Python 3.
|
|
13
|
+
The [Together Python API Library](https://pypi.org/project/together/) is the official Python client for Together's API platform, providing a convenient way for interacting with the REST APIs and enables easy integrations with Python 3.10+ applications with easy to use synchronous and asynchronous clients.
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
|
|
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
|
|
|
12
12
|
|
|
13
13
|
[tool.poetry]
|
|
14
14
|
name = "together"
|
|
15
|
-
version = "1.
|
|
15
|
+
version = "1.5.3"
|
|
16
16
|
authors = [
|
|
17
17
|
"Together AI <support@together.ai>"
|
|
18
18
|
]
|
|
@@ -28,7 +28,7 @@ repository = "https://github.com/togethercomputer/together-python"
|
|
|
28
28
|
homepage = "https://github.com/togethercomputer/together-python"
|
|
29
29
|
|
|
30
30
|
[tool.poetry.dependencies]
|
|
31
|
-
python = "^3.
|
|
31
|
+
python = "^3.10"
|
|
32
32
|
typer = ">=0.9,<0.16"
|
|
33
33
|
requests = "^2.31.0"
|
|
34
34
|
rich = "^13.8.1"
|
|
@@ -58,30 +58,50 @@ def fine_tuning(ctx: click.Context) -> None:
|
|
|
58
58
|
@fine_tuning.command()
|
|
59
59
|
@click.pass_context
|
|
60
60
|
@click.option(
|
|
61
|
-
"--training-file",
|
|
61
|
+
"--training-file",
|
|
62
|
+
"-t",
|
|
63
|
+
type=str,
|
|
64
|
+
required=True,
|
|
65
|
+
help="Training file ID from Files API",
|
|
66
|
+
)
|
|
67
|
+
@click.option("--model", "-m", type=str, help="Base model name")
|
|
68
|
+
@click.option(
|
|
69
|
+
"--n-epochs", "-ne", type=int, default=1, help="Number of epochs to train for"
|
|
62
70
|
)
|
|
63
|
-
@click.option("--model", type=str, required=True, help="Base model name")
|
|
64
|
-
@click.option("--n-epochs", type=int, default=1, help="Number of epochs to train for")
|
|
65
71
|
@click.option(
|
|
66
72
|
"--validation-file", type=str, default="", help="Validation file ID from Files API"
|
|
67
73
|
)
|
|
68
74
|
@click.option("--n-evals", type=int, default=0, help="Number of evaluation loops")
|
|
69
75
|
@click.option(
|
|
70
|
-
"--n-checkpoints", type=int, default=1, help="Number of checkpoints to save"
|
|
76
|
+
"--n-checkpoints", "-c", type=int, default=1, help="Number of checkpoints to save"
|
|
77
|
+
)
|
|
78
|
+
@click.option(
|
|
79
|
+
"--batch-size", "-b", type=INT_WITH_MAX, default="max", help="Train batch size"
|
|
80
|
+
)
|
|
81
|
+
@click.option("--learning-rate", "-lr", type=float, default=1e-5, help="Learning rate")
|
|
82
|
+
@click.option(
|
|
83
|
+
"--lr-scheduler-type",
|
|
84
|
+
type=click.Choice(["linear", "cosine"]),
|
|
85
|
+
default="linear",
|
|
86
|
+
help="Learning rate scheduler type",
|
|
71
87
|
)
|
|
72
|
-
@click.option("--batch-size", type=INT_WITH_MAX, default="max", help="Train batch size")
|
|
73
|
-
@click.option("--learning-rate", type=float, default=1e-5, help="Learning rate")
|
|
74
88
|
@click.option(
|
|
75
89
|
"--min-lr-ratio",
|
|
76
90
|
type=float,
|
|
77
91
|
default=0.0,
|
|
78
92
|
help="The ratio of the final learning rate to the peak learning rate",
|
|
79
93
|
)
|
|
94
|
+
@click.option(
|
|
95
|
+
"--scheduler-num-cycles",
|
|
96
|
+
type=float,
|
|
97
|
+
default=0.5,
|
|
98
|
+
help="Number or fraction of cycles for the cosine learning rate scheduler.",
|
|
99
|
+
)
|
|
80
100
|
@click.option(
|
|
81
101
|
"--warmup-ratio",
|
|
82
102
|
type=float,
|
|
83
103
|
default=0.0,
|
|
84
|
-
help="Warmup ratio for learning rate scheduler.",
|
|
104
|
+
help="Warmup ratio for the learning rate scheduler.",
|
|
85
105
|
)
|
|
86
106
|
@click.option(
|
|
87
107
|
"--max-grad-norm",
|
|
@@ -123,7 +143,11 @@ def fine_tuning(ctx: click.Context) -> None:
|
|
|
123
143
|
help="Beta parameter for DPO training (only used when '--training-method' is 'dpo')",
|
|
124
144
|
)
|
|
125
145
|
@click.option(
|
|
126
|
-
"--suffix",
|
|
146
|
+
"--suffix",
|
|
147
|
+
"-s",
|
|
148
|
+
type=str,
|
|
149
|
+
default=None,
|
|
150
|
+
help="Suffix for the fine-tuned model name",
|
|
127
151
|
)
|
|
128
152
|
@click.option("--wandb-api-key", type=str, default=None, help="Wandb API key")
|
|
129
153
|
@click.option("--wandb-base-url", type=str, default=None, help="Wandb base URL")
|
|
@@ -162,7 +186,9 @@ def create(
|
|
|
162
186
|
n_checkpoints: int,
|
|
163
187
|
batch_size: int | Literal["max"],
|
|
164
188
|
learning_rate: float,
|
|
189
|
+
lr_scheduler_type: Literal["linear", "cosine"],
|
|
165
190
|
min_lr_ratio: float,
|
|
191
|
+
scheduler_num_cycles: float,
|
|
166
192
|
warmup_ratio: float,
|
|
167
193
|
max_grad_norm: float,
|
|
168
194
|
weight_decay: float,
|
|
@@ -194,7 +220,9 @@ def create(
|
|
|
194
220
|
n_checkpoints=n_checkpoints,
|
|
195
221
|
batch_size=batch_size,
|
|
196
222
|
learning_rate=learning_rate,
|
|
223
|
+
lr_scheduler_type=lr_scheduler_type,
|
|
197
224
|
min_lr_ratio=min_lr_ratio,
|
|
225
|
+
scheduler_num_cycles=scheduler_num_cycles,
|
|
198
226
|
warmup_ratio=warmup_ratio,
|
|
199
227
|
max_grad_norm=max_grad_norm,
|
|
200
228
|
weight_decay=weight_decay,
|
|
@@ -214,8 +242,15 @@ def create(
|
|
|
214
242
|
from_checkpoint=from_checkpoint,
|
|
215
243
|
)
|
|
216
244
|
|
|
245
|
+
if model is None and from_checkpoint is None:
|
|
246
|
+
raise click.BadParameter("You must specify either a model or a checkpoint")
|
|
247
|
+
|
|
248
|
+
model_name = model
|
|
249
|
+
if from_checkpoint is not None:
|
|
250
|
+
model_name = from_checkpoint.split(":")[0]
|
|
251
|
+
|
|
217
252
|
model_limits: FinetuneTrainingLimits = client.fine_tuning.get_model_limits(
|
|
218
|
-
model=
|
|
253
|
+
model=model_name
|
|
219
254
|
)
|
|
220
255
|
|
|
221
256
|
if lora:
|
|
@@ -411,6 +446,7 @@ def list_checkpoints(ctx: click.Context, fine_tune_id: str) -> None:
|
|
|
411
446
|
@click.argument("fine_tune_id", type=str, required=True)
|
|
412
447
|
@click.option(
|
|
413
448
|
"--output_dir",
|
|
449
|
+
"-o",
|
|
414
450
|
type=click.Path(exists=True, file_okay=False, resolve_path=True),
|
|
415
451
|
required=False,
|
|
416
452
|
default=None,
|
|
@@ -418,6 +454,7 @@ def list_checkpoints(ctx: click.Context, fine_tune_id: str) -> None:
|
|
|
418
454
|
)
|
|
419
455
|
@click.option(
|
|
420
456
|
"--checkpoint-step",
|
|
457
|
+
"-s",
|
|
421
458
|
type=int,
|
|
422
459
|
required=False,
|
|
423
460
|
default=None,
|
|
@@ -22,7 +22,10 @@ from together.types import (
|
|
|
22
22
|
TogetherRequest,
|
|
23
23
|
TrainingType,
|
|
24
24
|
FinetuneLRScheduler,
|
|
25
|
+
FinetuneLinearLRScheduler,
|
|
26
|
+
FinetuneCosineLRScheduler,
|
|
25
27
|
FinetuneLinearLRSchedulerArgs,
|
|
28
|
+
FinetuneCosineLRSchedulerArgs,
|
|
26
29
|
TrainingMethodDPO,
|
|
27
30
|
TrainingMethodSFT,
|
|
28
31
|
FinetuneCheckpoint,
|
|
@@ -50,14 +53,16 @@ AVAILABLE_TRAINING_METHODS = {
|
|
|
50
53
|
def createFinetuneRequest(
|
|
51
54
|
model_limits: FinetuneTrainingLimits,
|
|
52
55
|
training_file: str,
|
|
53
|
-
model: str,
|
|
56
|
+
model: str | None = None,
|
|
54
57
|
n_epochs: int = 1,
|
|
55
58
|
validation_file: str | None = "",
|
|
56
59
|
n_evals: int | None = 0,
|
|
57
60
|
n_checkpoints: int | None = 1,
|
|
58
61
|
batch_size: int | Literal["max"] = "max",
|
|
59
62
|
learning_rate: float | None = 0.00001,
|
|
63
|
+
lr_scheduler_type: Literal["linear", "cosine"] = "linear",
|
|
60
64
|
min_lr_ratio: float = 0.0,
|
|
65
|
+
scheduler_num_cycles: float = 0.5,
|
|
61
66
|
warmup_ratio: float = 0.0,
|
|
62
67
|
max_grad_norm: float = 1.0,
|
|
63
68
|
weight_decay: float = 0.0,
|
|
@@ -77,6 +82,14 @@ def createFinetuneRequest(
|
|
|
77
82
|
from_checkpoint: str | None = None,
|
|
78
83
|
) -> FinetuneRequest:
|
|
79
84
|
|
|
85
|
+
if model is not None and from_checkpoint is not None:
|
|
86
|
+
raise ValueError(
|
|
87
|
+
"You must specify either a model or a checkpoint to start a job from, not both"
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
if model is None and from_checkpoint is None:
|
|
91
|
+
raise ValueError("You must specify either a model or a checkpoint")
|
|
92
|
+
|
|
80
93
|
if batch_size == "max":
|
|
81
94
|
log_warn_once(
|
|
82
95
|
"Starting from together>=1.3.0, "
|
|
@@ -86,6 +99,8 @@ def createFinetuneRequest(
|
|
|
86
99
|
warmup_ratio = 0.0
|
|
87
100
|
|
|
88
101
|
training_type: TrainingType = FullTrainingType()
|
|
102
|
+
max_batch_size: int = 0
|
|
103
|
+
min_batch_size: int = 0
|
|
89
104
|
if lora:
|
|
90
105
|
if model_limits.lora_training is None:
|
|
91
106
|
raise ValueError("LoRA adapters are not supported for the selected model.")
|
|
@@ -98,18 +113,26 @@ def createFinetuneRequest(
|
|
|
98
113
|
lora_trainable_modules=lora_trainable_modules,
|
|
99
114
|
)
|
|
100
115
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
else model_limits.lora_training.max_batch_size
|
|
105
|
-
)
|
|
116
|
+
max_batch_size = model_limits.lora_training.max_batch_size
|
|
117
|
+
min_batch_size = model_limits.lora_training.min_batch_size
|
|
118
|
+
|
|
106
119
|
else:
|
|
107
120
|
if model_limits.full_training is None:
|
|
108
121
|
raise ValueError("Full training is not supported for the selected model.")
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
122
|
+
|
|
123
|
+
max_batch_size = model_limits.full_training.max_batch_size
|
|
124
|
+
min_batch_size = model_limits.full_training.min_batch_size
|
|
125
|
+
|
|
126
|
+
batch_size = batch_size if batch_size != "max" else max_batch_size
|
|
127
|
+
|
|
128
|
+
if batch_size > max_batch_size:
|
|
129
|
+
raise ValueError(
|
|
130
|
+
"Requested batch size is higher that the maximum allowed value."
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
if batch_size < min_batch_size:
|
|
134
|
+
raise ValueError(
|
|
135
|
+
"Requested batch size is lower that the minimum allowed value."
|
|
113
136
|
)
|
|
114
137
|
|
|
115
138
|
if warmup_ratio > 1 or warmup_ratio < 0:
|
|
@@ -129,10 +152,22 @@ def createFinetuneRequest(
|
|
|
129
152
|
f"training_method must be one of {', '.join(AVAILABLE_TRAINING_METHODS)}"
|
|
130
153
|
)
|
|
131
154
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
155
|
+
# Default to generic lr scheduler
|
|
156
|
+
lrScheduler: FinetuneLRScheduler = FinetuneLRScheduler(lr_scheduler_type="linear")
|
|
157
|
+
|
|
158
|
+
if lr_scheduler_type == "cosine":
|
|
159
|
+
if scheduler_num_cycles <= 0.0:
|
|
160
|
+
raise ValueError("Number of cycles should be greater than 0")
|
|
161
|
+
|
|
162
|
+
lrScheduler = FinetuneCosineLRScheduler(
|
|
163
|
+
lr_scheduler_args=FinetuneCosineLRSchedulerArgs(
|
|
164
|
+
min_lr_ratio=min_lr_ratio, num_cycles=scheduler_num_cycles
|
|
165
|
+
),
|
|
166
|
+
)
|
|
167
|
+
else:
|
|
168
|
+
lrScheduler = FinetuneLinearLRScheduler(
|
|
169
|
+
lr_scheduler_args=FinetuneLinearLRSchedulerArgs(min_lr_ratio=min_lr_ratio),
|
|
170
|
+
)
|
|
136
171
|
|
|
137
172
|
training_method_cls: TrainingMethodSFT | TrainingMethodDPO = TrainingMethodSFT()
|
|
138
173
|
if training_method == "dpo":
|
|
@@ -237,14 +272,16 @@ class FineTuning:
|
|
|
237
272
|
self,
|
|
238
273
|
*,
|
|
239
274
|
training_file: str,
|
|
240
|
-
model: str,
|
|
275
|
+
model: str | None = None,
|
|
241
276
|
n_epochs: int = 1,
|
|
242
277
|
validation_file: str | None = "",
|
|
243
278
|
n_evals: int | None = 0,
|
|
244
279
|
n_checkpoints: int | None = 1,
|
|
245
280
|
batch_size: int | Literal["max"] = "max",
|
|
246
281
|
learning_rate: float | None = 0.00001,
|
|
282
|
+
lr_scheduler_type: Literal["linear", "cosine"] = "linear",
|
|
247
283
|
min_lr_ratio: float = 0.0,
|
|
284
|
+
scheduler_num_cycles: float = 0.5,
|
|
248
285
|
warmup_ratio: float = 0.0,
|
|
249
286
|
max_grad_norm: float = 1.0,
|
|
250
287
|
weight_decay: float = 0.0,
|
|
@@ -270,7 +307,7 @@ class FineTuning:
|
|
|
270
307
|
|
|
271
308
|
Args:
|
|
272
309
|
training_file (str): File-ID of a file uploaded to the Together API
|
|
273
|
-
model (str): Name of the base model to run fine-tune job on
|
|
310
|
+
model (str, optional): Name of the base model to run fine-tune job on
|
|
274
311
|
n_epochs (int, optional): Number of epochs for fine-tuning. Defaults to 1.
|
|
275
312
|
validation file (str, optional): File ID of a file uploaded to the Together API for validation.
|
|
276
313
|
n_evals (int, optional): Number of evaluation loops to run. Defaults to 0.
|
|
@@ -279,9 +316,11 @@ class FineTuning:
|
|
|
279
316
|
batch_size (int or "max"): Batch size for fine-tuning. Defaults to max.
|
|
280
317
|
learning_rate (float, optional): Learning rate multiplier to use for training
|
|
281
318
|
Defaults to 0.00001.
|
|
319
|
+
lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "linear".
|
|
282
320
|
min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
|
|
283
321
|
the learning rate scheduler. Defaults to 0.0.
|
|
284
|
-
|
|
322
|
+
scheduler_num_cycles (float, optional): Number or fraction of cycles for the cosine learning rate scheduler. Defaults to 0.5.
|
|
323
|
+
warmup_ratio (float, optional): Warmup ratio for the learning rate scheduler.
|
|
285
324
|
max_grad_norm (float, optional): Max gradient norm. Defaults to 1.0, set to 0 to disable.
|
|
286
325
|
weight_decay (float, optional): Weight decay. Defaults to 0.0.
|
|
287
326
|
lora (bool, optional): Whether to use LoRA adapters. Defaults to True.
|
|
@@ -325,7 +364,16 @@ class FineTuning:
|
|
|
325
364
|
)
|
|
326
365
|
|
|
327
366
|
if model_limits is None:
|
|
328
|
-
|
|
367
|
+
# mypy doesn't understand that model or from_checkpoint is not None
|
|
368
|
+
if model is not None:
|
|
369
|
+
model_name = model
|
|
370
|
+
elif from_checkpoint is not None:
|
|
371
|
+
model_name = from_checkpoint.split(":")[0]
|
|
372
|
+
else:
|
|
373
|
+
# this branch is unreachable, but mypy doesn't know that
|
|
374
|
+
pass
|
|
375
|
+
model_limits = self.get_model_limits(model=model_name)
|
|
376
|
+
|
|
329
377
|
finetune_request = createFinetuneRequest(
|
|
330
378
|
model_limits=model_limits,
|
|
331
379
|
training_file=training_file,
|
|
@@ -336,7 +384,9 @@ class FineTuning:
|
|
|
336
384
|
n_checkpoints=n_checkpoints,
|
|
337
385
|
batch_size=batch_size,
|
|
338
386
|
learning_rate=learning_rate,
|
|
387
|
+
lr_scheduler_type=lr_scheduler_type,
|
|
339
388
|
min_lr_ratio=min_lr_ratio,
|
|
389
|
+
scheduler_num_cycles=scheduler_num_cycles,
|
|
340
390
|
warmup_ratio=warmup_ratio,
|
|
341
391
|
max_grad_norm=max_grad_norm,
|
|
342
392
|
weight_decay=weight_decay,
|
|
@@ -610,14 +660,16 @@ class AsyncFineTuning:
|
|
|
610
660
|
self,
|
|
611
661
|
*,
|
|
612
662
|
training_file: str,
|
|
613
|
-
model: str,
|
|
663
|
+
model: str | None = None,
|
|
614
664
|
n_epochs: int = 1,
|
|
615
665
|
validation_file: str | None = "",
|
|
616
666
|
n_evals: int | None = 0,
|
|
617
667
|
n_checkpoints: int | None = 1,
|
|
618
668
|
batch_size: int | Literal["max"] = "max",
|
|
619
669
|
learning_rate: float | None = 0.00001,
|
|
670
|
+
lr_scheduler_type: Literal["linear", "cosine"] = "linear",
|
|
620
671
|
min_lr_ratio: float = 0.0,
|
|
672
|
+
scheduler_num_cycles: float = 0.5,
|
|
621
673
|
warmup_ratio: float = 0.0,
|
|
622
674
|
max_grad_norm: float = 1.0,
|
|
623
675
|
weight_decay: float = 0.0,
|
|
@@ -643,7 +695,7 @@ class AsyncFineTuning:
|
|
|
643
695
|
|
|
644
696
|
Args:
|
|
645
697
|
training_file (str): File-ID of a file uploaded to the Together API
|
|
646
|
-
model (str): Name of the base model to run fine-tune job on
|
|
698
|
+
model (str, optional): Name of the base model to run fine-tune job on
|
|
647
699
|
n_epochs (int, optional): Number of epochs for fine-tuning. Defaults to 1.
|
|
648
700
|
validation file (str, optional): File ID of a file uploaded to the Together API for validation.
|
|
649
701
|
n_evals (int, optional): Number of evaluation loops to run. Defaults to 0.
|
|
@@ -652,9 +704,11 @@ class AsyncFineTuning:
|
|
|
652
704
|
batch_size (int, optional): Batch size for fine-tuning. Defaults to max.
|
|
653
705
|
learning_rate (float, optional): Learning rate multiplier to use for training
|
|
654
706
|
Defaults to 0.00001.
|
|
707
|
+
lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "linear".
|
|
655
708
|
min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
|
|
656
709
|
the learning rate scheduler. Defaults to 0.0.
|
|
657
|
-
|
|
710
|
+
scheduler_num_cycles (float, optional): Number or fraction of cycles for the cosine learning rate scheduler. Defaults to 0.5.
|
|
711
|
+
warmup_ratio (float, optional): Warmup ratio for the learning rate scheduler.
|
|
658
712
|
max_grad_norm (float, optional): Max gradient norm. Defaults to 1.0, set to 0 to disable.
|
|
659
713
|
weight_decay (float, optional): Weight decay. Defaults to 0.0.
|
|
660
714
|
lora (bool, optional): Whether to use LoRA adapters. Defaults to True.
|
|
@@ -698,7 +752,15 @@ class AsyncFineTuning:
|
|
|
698
752
|
)
|
|
699
753
|
|
|
700
754
|
if model_limits is None:
|
|
701
|
-
|
|
755
|
+
# mypy doesn't understand that model or from_checkpoint is not None
|
|
756
|
+
if model is not None:
|
|
757
|
+
model_name = model
|
|
758
|
+
elif from_checkpoint is not None:
|
|
759
|
+
model_name = from_checkpoint.split(":")[0]
|
|
760
|
+
else:
|
|
761
|
+
# this branch is unreachable, but mypy doesn't know that
|
|
762
|
+
pass
|
|
763
|
+
model_limits = await self.get_model_limits(model=model_name)
|
|
702
764
|
|
|
703
765
|
finetune_request = createFinetuneRequest(
|
|
704
766
|
model_limits=model_limits,
|
|
@@ -710,7 +772,9 @@ class AsyncFineTuning:
|
|
|
710
772
|
n_checkpoints=n_checkpoints,
|
|
711
773
|
batch_size=batch_size,
|
|
712
774
|
learning_rate=learning_rate,
|
|
775
|
+
lr_scheduler_type=lr_scheduler_type,
|
|
713
776
|
min_lr_ratio=min_lr_ratio,
|
|
777
|
+
scheduler_num_cycles=scheduler_num_cycles,
|
|
714
778
|
warmup_ratio=warmup_ratio,
|
|
715
779
|
max_grad_norm=max_grad_norm,
|
|
716
780
|
weight_decay=weight_decay,
|
|
@@ -34,11 +34,14 @@ from together.types.finetune import (
|
|
|
34
34
|
TrainingMethodDPO,
|
|
35
35
|
TrainingMethodSFT,
|
|
36
36
|
FinetuneCheckpoint,
|
|
37
|
+
FinetuneCosineLRScheduler,
|
|
38
|
+
FinetuneCosineLRSchedulerArgs,
|
|
37
39
|
FinetuneDownloadResult,
|
|
40
|
+
FinetuneLinearLRScheduler,
|
|
38
41
|
FinetuneLinearLRSchedulerArgs,
|
|
42
|
+
FinetuneLRScheduler,
|
|
39
43
|
FinetuneList,
|
|
40
44
|
FinetuneListEvents,
|
|
41
|
-
FinetuneLRScheduler,
|
|
42
45
|
FinetuneRequest,
|
|
43
46
|
FinetuneResponse,
|
|
44
47
|
FinetuneTrainingLimits,
|
|
@@ -69,7 +72,10 @@ __all__ = [
|
|
|
69
72
|
"FinetuneListEvents",
|
|
70
73
|
"FinetuneDownloadResult",
|
|
71
74
|
"FinetuneLRScheduler",
|
|
75
|
+
"FinetuneLinearLRScheduler",
|
|
72
76
|
"FinetuneLinearLRSchedulerArgs",
|
|
77
|
+
"FinetuneCosineLRScheduler",
|
|
78
|
+
"FinetuneCosineLRSchedulerArgs",
|
|
73
79
|
"FileRequest",
|
|
74
80
|
"FileResponse",
|
|
75
81
|
"FileList",
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from enum import Enum
|
|
4
|
-
from typing import List, Literal
|
|
4
|
+
from typing import List, Literal, Union
|
|
5
5
|
|
|
6
|
-
from pydantic import StrictBool, Field, validator, field_validator
|
|
6
|
+
from pydantic import StrictBool, Field, validator, field_validator, ValidationInfo
|
|
7
7
|
|
|
8
8
|
from together.types.abstract import BaseModel
|
|
9
9
|
from together.types.common import (
|
|
@@ -170,13 +170,13 @@ class FinetuneRequest(BaseModel):
|
|
|
170
170
|
# validation file id
|
|
171
171
|
validation_file: str | None = None
|
|
172
172
|
# base model string
|
|
173
|
-
model: str
|
|
173
|
+
model: str | None = None
|
|
174
174
|
# number of epochs to train for
|
|
175
175
|
n_epochs: int
|
|
176
176
|
# training learning rate
|
|
177
177
|
learning_rate: float
|
|
178
178
|
# learning rate scheduler type and args
|
|
179
|
-
lr_scheduler:
|
|
179
|
+
lr_scheduler: FinetuneLinearLRScheduler | FinetuneCosineLRScheduler | None = None
|
|
180
180
|
# learning rate warmup ratio
|
|
181
181
|
warmup_ratio: float
|
|
182
182
|
# max gradient norm
|
|
@@ -239,7 +239,7 @@ class FinetuneResponse(BaseModel):
|
|
|
239
239
|
# training learning rate
|
|
240
240
|
learning_rate: float | None = None
|
|
241
241
|
# learning rate scheduler type and args
|
|
242
|
-
lr_scheduler:
|
|
242
|
+
lr_scheduler: FinetuneLinearLRScheduler | FinetuneCosineLRScheduler | None = None
|
|
243
243
|
# learning rate warmup ratio
|
|
244
244
|
warmup_ratio: float | None = None
|
|
245
245
|
# max gradient norm
|
|
@@ -345,13 +345,27 @@ class FinetuneTrainingLimits(BaseModel):
|
|
|
345
345
|
lora_training: FinetuneLoraTrainingLimits | None = None
|
|
346
346
|
|
|
347
347
|
|
|
348
|
+
class FinetuneLinearLRSchedulerArgs(BaseModel):
|
|
349
|
+
min_lr_ratio: float | None = 0.0
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
class FinetuneCosineLRSchedulerArgs(BaseModel):
|
|
353
|
+
min_lr_ratio: float | None = 0.0
|
|
354
|
+
num_cycles: float | None = 0.5
|
|
355
|
+
|
|
356
|
+
|
|
348
357
|
class FinetuneLRScheduler(BaseModel):
|
|
349
358
|
lr_scheduler_type: str
|
|
350
|
-
lr_scheduler_args: FinetuneLinearLRSchedulerArgs | None = None
|
|
351
359
|
|
|
352
360
|
|
|
353
|
-
class
|
|
354
|
-
|
|
361
|
+
class FinetuneLinearLRScheduler(FinetuneLRScheduler):
|
|
362
|
+
lr_scheduler_type: Literal["linear"] = "linear"
|
|
363
|
+
lr_scheduler: FinetuneLinearLRSchedulerArgs | None = None
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
class FinetuneCosineLRScheduler(FinetuneLRScheduler):
|
|
367
|
+
lr_scheduler_type: Literal["cosine"] = "cosine"
|
|
368
|
+
lr_scheduler: FinetuneCosineLRSchedulerArgs | None = None
|
|
355
369
|
|
|
356
370
|
|
|
357
371
|
class FinetuneCheckpoint(BaseModel):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|