together 1.3.4__tar.gz → 1.3.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {together-1.3.4 → together-1.3.5}/PKG-INFO +1 -1
- {together-1.3.4 → together-1.3.5}/pyproject.toml +1 -1
- {together-1.3.4 → together-1.3.5}/src/together/cli/api/finetune.py +24 -0
- {together-1.3.4 → together-1.3.5}/src/together/resources/finetune.py +45 -3
- {together-1.3.4 → together-1.3.5}/src/together/types/__init__.py +4 -0
- {together-1.3.4 → together-1.3.5}/src/together/types/finetune.py +21 -0
- {together-1.3.4 → together-1.3.5}/LICENSE +0 -0
- {together-1.3.4 → together-1.3.5}/README.md +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/__init__.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/abstract/__init__.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/abstract/api_requestor.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/cli/__init__.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/cli/api/__init__.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/cli/api/chat.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/cli/api/completions.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/cli/api/files.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/cli/api/images.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/cli/api/models.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/cli/api/utils.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/cli/cli.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/client.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/constants.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/error.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/filemanager.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/legacy/__init__.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/legacy/base.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/legacy/complete.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/legacy/embeddings.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/legacy/files.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/legacy/finetune.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/legacy/images.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/legacy/models.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/resources/__init__.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/resources/chat/__init__.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/resources/chat/completions.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/resources/completions.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/resources/embeddings.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/resources/files.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/resources/images.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/resources/models.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/resources/rerank.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/together_response.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/types/abstract.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/types/chat_completions.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/types/common.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/types/completions.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/types/embeddings.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/types/error.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/types/files.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/types/images.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/types/models.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/types/rerank.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/utils/__init__.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/utils/_log.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/utils/api_helpers.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/utils/files.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/utils/tools.py +0 -0
- {together-1.3.4 → together-1.3.5}/src/together/version.py +0 -0
|
@@ -65,12 +65,30 @@ def fine_tuning(ctx: click.Context) -> None:
|
|
|
65
65
|
)
|
|
66
66
|
@click.option("--batch-size", type=INT_WITH_MAX, default="max", help="Train batch size")
|
|
67
67
|
@click.option("--learning-rate", type=float, default=1e-5, help="Learning rate")
|
|
68
|
+
@click.option(
|
|
69
|
+
"--min-lr-ratio",
|
|
70
|
+
type=float,
|
|
71
|
+
default=0.0,
|
|
72
|
+
help="The ratio of the final learning rate to the peak learning rate",
|
|
73
|
+
)
|
|
68
74
|
@click.option(
|
|
69
75
|
"--warmup-ratio",
|
|
70
76
|
type=float,
|
|
71
77
|
default=0.0,
|
|
72
78
|
help="Warmup ratio for learning rate scheduler.",
|
|
73
79
|
)
|
|
80
|
+
@click.option(
|
|
81
|
+
"--max-grad-norm",
|
|
82
|
+
type=float,
|
|
83
|
+
default=1.0,
|
|
84
|
+
help="Max gradient norm to be used for gradient clipping. Set to 0 to disable.",
|
|
85
|
+
)
|
|
86
|
+
@click.option(
|
|
87
|
+
"--weight-decay",
|
|
88
|
+
type=float,
|
|
89
|
+
default=0.0,
|
|
90
|
+
help="Weight decay",
|
|
91
|
+
)
|
|
74
92
|
@click.option(
|
|
75
93
|
"--lora/--no-lora",
|
|
76
94
|
type=bool,
|
|
@@ -115,7 +133,10 @@ def create(
|
|
|
115
133
|
n_checkpoints: int,
|
|
116
134
|
batch_size: int | Literal["max"],
|
|
117
135
|
learning_rate: float,
|
|
136
|
+
min_lr_ratio: float,
|
|
118
137
|
warmup_ratio: float,
|
|
138
|
+
max_grad_norm: float,
|
|
139
|
+
weight_decay: float,
|
|
119
140
|
lora: bool,
|
|
120
141
|
lora_r: int,
|
|
121
142
|
lora_dropout: float,
|
|
@@ -138,7 +159,10 @@ def create(
|
|
|
138
159
|
n_checkpoints=n_checkpoints,
|
|
139
160
|
batch_size=batch_size,
|
|
140
161
|
learning_rate=learning_rate,
|
|
162
|
+
min_lr_ratio=min_lr_ratio,
|
|
141
163
|
warmup_ratio=warmup_ratio,
|
|
164
|
+
max_grad_norm=max_grad_norm,
|
|
165
|
+
weight_decay=weight_decay,
|
|
142
166
|
lora=lora,
|
|
143
167
|
lora_r=lora_r,
|
|
144
168
|
lora_dropout=lora_dropout,
|
|
@@ -20,6 +20,8 @@ from together.types import (
|
|
|
20
20
|
TogetherClient,
|
|
21
21
|
TogetherRequest,
|
|
22
22
|
TrainingType,
|
|
23
|
+
FinetuneLRScheduler,
|
|
24
|
+
FinetuneLinearLRSchedulerArgs,
|
|
23
25
|
)
|
|
24
26
|
from together.types.finetune import DownloadCheckpointType
|
|
25
27
|
from together.utils import log_warn_once, normalize_key
|
|
@@ -35,7 +37,10 @@ def createFinetuneRequest(
|
|
|
35
37
|
n_checkpoints: int | None = 1,
|
|
36
38
|
batch_size: int | Literal["max"] = "max",
|
|
37
39
|
learning_rate: float | None = 0.00001,
|
|
38
|
-
|
|
40
|
+
min_lr_ratio: float = 0.0,
|
|
41
|
+
warmup_ratio: float = 0.0,
|
|
42
|
+
max_grad_norm: float = 1.0,
|
|
43
|
+
weight_decay: float = 0.0,
|
|
39
44
|
lora: bool = False,
|
|
40
45
|
lora_r: int | None = None,
|
|
41
46
|
lora_dropout: float | None = 0,
|
|
@@ -83,6 +88,20 @@ def createFinetuneRequest(
|
|
|
83
88
|
if warmup_ratio > 1 or warmup_ratio < 0:
|
|
84
89
|
raise ValueError("Warmup ratio should be between 0 and 1")
|
|
85
90
|
|
|
91
|
+
if min_lr_ratio is not None and (min_lr_ratio > 1 or min_lr_ratio < 0):
|
|
92
|
+
raise ValueError("Min learning rate ratio should be between 0 and 1")
|
|
93
|
+
|
|
94
|
+
if max_grad_norm < 0:
|
|
95
|
+
raise ValueError("Max gradient norm should be non-negative")
|
|
96
|
+
|
|
97
|
+
if weight_decay is not None and (weight_decay < 0):
|
|
98
|
+
raise ValueError("Weight decay should be non-negative")
|
|
99
|
+
|
|
100
|
+
lrScheduler = FinetuneLRScheduler(
|
|
101
|
+
lr_scheduler_type="linear",
|
|
102
|
+
lr_scheduler_args=FinetuneLinearLRSchedulerArgs(min_lr_ratio=min_lr_ratio),
|
|
103
|
+
)
|
|
104
|
+
|
|
86
105
|
finetune_request = FinetuneRequest(
|
|
87
106
|
model=model,
|
|
88
107
|
training_file=training_file,
|
|
@@ -92,7 +111,10 @@ def createFinetuneRequest(
|
|
|
92
111
|
n_checkpoints=n_checkpoints,
|
|
93
112
|
batch_size=batch_size,
|
|
94
113
|
learning_rate=learning_rate,
|
|
114
|
+
lr_scheduler=lrScheduler,
|
|
95
115
|
warmup_ratio=warmup_ratio,
|
|
116
|
+
max_grad_norm=max_grad_norm,
|
|
117
|
+
weight_decay=weight_decay,
|
|
96
118
|
training_type=training_type,
|
|
97
119
|
suffix=suffix,
|
|
98
120
|
wandb_key=wandb_api_key,
|
|
@@ -117,7 +139,10 @@ class FineTuning:
|
|
|
117
139
|
n_checkpoints: int | None = 1,
|
|
118
140
|
batch_size: int | Literal["max"] = "max",
|
|
119
141
|
learning_rate: float | None = 0.00001,
|
|
120
|
-
|
|
142
|
+
min_lr_ratio: float = 0.0,
|
|
143
|
+
warmup_ratio: float = 0.0,
|
|
144
|
+
max_grad_norm: float = 1.0,
|
|
145
|
+
weight_decay: float = 0.0,
|
|
121
146
|
lora: bool = False,
|
|
122
147
|
lora_r: int | None = None,
|
|
123
148
|
lora_dropout: float | None = 0,
|
|
@@ -143,7 +168,11 @@ class FineTuning:
|
|
|
143
168
|
batch_size (int or "max"): Batch size for fine-tuning. Defaults to max.
|
|
144
169
|
learning_rate (float, optional): Learning rate multiplier to use for training
|
|
145
170
|
Defaults to 0.00001.
|
|
171
|
+
min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
|
|
172
|
+
the learning rate scheduler. Defaults to 0.0.
|
|
146
173
|
warmup_ratio (float, optional): Warmup ratio for learning rate scheduler.
|
|
174
|
+
max_grad_norm (float, optional): Max gradient norm. Defaults to 1.0, set to 0 to disable.
|
|
175
|
+
weight_decay (float, optional): Weight decay. Defaults to 0.0.
|
|
147
176
|
lora (bool, optional): Whether to use LoRA adapters. Defaults to True.
|
|
148
177
|
lora_r (int, optional): Rank of LoRA adapters. Defaults to 8.
|
|
149
178
|
lora_dropout (float, optional): Dropout rate for LoRA adapters. Defaults to 0.
|
|
@@ -185,7 +214,10 @@ class FineTuning:
|
|
|
185
214
|
n_checkpoints=n_checkpoints,
|
|
186
215
|
batch_size=batch_size,
|
|
187
216
|
learning_rate=learning_rate,
|
|
217
|
+
min_lr_ratio=min_lr_ratio,
|
|
188
218
|
warmup_ratio=warmup_ratio,
|
|
219
|
+
max_grad_norm=max_grad_norm,
|
|
220
|
+
weight_decay=weight_decay,
|
|
189
221
|
lora=lora,
|
|
190
222
|
lora_r=lora_r,
|
|
191
223
|
lora_dropout=lora_dropout,
|
|
@@ -436,7 +468,10 @@ class AsyncFineTuning:
|
|
|
436
468
|
n_checkpoints: int | None = 1,
|
|
437
469
|
batch_size: int | Literal["max"] = "max",
|
|
438
470
|
learning_rate: float | None = 0.00001,
|
|
439
|
-
|
|
471
|
+
min_lr_ratio: float = 0.0,
|
|
472
|
+
warmup_ratio: float = 0.0,
|
|
473
|
+
max_grad_norm: float = 1.0,
|
|
474
|
+
weight_decay: float = 0.0,
|
|
440
475
|
lora: bool = False,
|
|
441
476
|
lora_r: int | None = None,
|
|
442
477
|
lora_dropout: float | None = 0,
|
|
@@ -462,7 +497,11 @@ class AsyncFineTuning:
|
|
|
462
497
|
batch_size (int, optional): Batch size for fine-tuning. Defaults to max.
|
|
463
498
|
learning_rate (float, optional): Learning rate multiplier to use for training
|
|
464
499
|
Defaults to 0.00001.
|
|
500
|
+
min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
|
|
501
|
+
the learning rate scheduler. Defaults to 0.0.
|
|
465
502
|
warmup_ratio (float, optional): Warmup ratio for learning rate scheduler.
|
|
503
|
+
max_grad_norm (float, optional): Max gradient norm. Defaults to 1.0, set to 0 to disable.
|
|
504
|
+
weight_decay (float, optional): Weight decay. Defaults to 0.0.
|
|
466
505
|
lora (bool, optional): Whether to use LoRA adapters. Defaults to True.
|
|
467
506
|
lora_r (int, optional): Rank of LoRA adapters. Defaults to 8.
|
|
468
507
|
lora_dropout (float, optional): Dropout rate for LoRA adapters. Defaults to 0.
|
|
@@ -504,7 +543,10 @@ class AsyncFineTuning:
|
|
|
504
543
|
n_checkpoints=n_checkpoints,
|
|
505
544
|
batch_size=batch_size,
|
|
506
545
|
learning_rate=learning_rate,
|
|
546
|
+
min_lr_ratio=min_lr_ratio,
|
|
507
547
|
warmup_ratio=warmup_ratio,
|
|
548
|
+
max_grad_norm=max_grad_norm,
|
|
549
|
+
weight_decay=weight_decay,
|
|
508
550
|
lora=lora,
|
|
509
551
|
lora_r=lora_r,
|
|
510
552
|
lora_dropout=lora_dropout,
|
|
@@ -30,6 +30,8 @@ from together.types.finetune import (
|
|
|
30
30
|
LoRATrainingType,
|
|
31
31
|
TrainingType,
|
|
32
32
|
FinetuneTrainingLimits,
|
|
33
|
+
FinetuneLRScheduler,
|
|
34
|
+
FinetuneLinearLRSchedulerArgs,
|
|
33
35
|
)
|
|
34
36
|
from together.types.images import (
|
|
35
37
|
ImageRequest,
|
|
@@ -57,6 +59,8 @@ __all__ = [
|
|
|
57
59
|
"FinetuneList",
|
|
58
60
|
"FinetuneListEvents",
|
|
59
61
|
"FinetuneDownloadResult",
|
|
62
|
+
"FinetuneLRScheduler",
|
|
63
|
+
"FinetuneLinearLRSchedulerArgs",
|
|
60
64
|
"FileRequest",
|
|
61
65
|
"FileResponse",
|
|
62
66
|
"FileList",
|
|
@@ -150,8 +150,14 @@ class FinetuneRequest(BaseModel):
|
|
|
150
150
|
n_epochs: int
|
|
151
151
|
# training learning rate
|
|
152
152
|
learning_rate: float
|
|
153
|
+
# learning rate scheduler type and args
|
|
154
|
+
lr_scheduler: FinetuneLRScheduler | None = None
|
|
153
155
|
# learning rate warmup ratio
|
|
154
156
|
warmup_ratio: float
|
|
157
|
+
# max gradient norm
|
|
158
|
+
max_grad_norm: float
|
|
159
|
+
# weight decay
|
|
160
|
+
weight_decay: float
|
|
155
161
|
# number of checkpoints to save
|
|
156
162
|
n_checkpoints: int | None = None
|
|
157
163
|
# number of evaluation loops to run
|
|
@@ -193,8 +199,14 @@ class FinetuneResponse(BaseModel):
|
|
|
193
199
|
batch_size: int | None = None
|
|
194
200
|
# training learning rate
|
|
195
201
|
learning_rate: float | None = None
|
|
202
|
+
# learning rate scheduler type and args
|
|
203
|
+
lr_scheduler: FinetuneLRScheduler | None = None
|
|
196
204
|
# learning rate warmup ratio
|
|
197
205
|
warmup_ratio: float | None = None
|
|
206
|
+
# max gradient norm
|
|
207
|
+
max_grad_norm: float | None = None
|
|
208
|
+
# weight decay
|
|
209
|
+
weight_decay: float | None = None
|
|
198
210
|
# number of steps between evals
|
|
199
211
|
eval_steps: int | None = None
|
|
200
212
|
# training type
|
|
@@ -287,3 +299,12 @@ class FinetuneTrainingLimits(BaseModel):
|
|
|
287
299
|
min_learning_rate: float
|
|
288
300
|
full_training: FinetuneFullTrainingLimits | None = None
|
|
289
301
|
lora_training: FinetuneLoraTrainingLimits | None = None
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
class FinetuneLRScheduler(BaseModel):
|
|
305
|
+
lr_scheduler_type: str
|
|
306
|
+
lr_scheduler_args: FinetuneLinearLRSchedulerArgs | None = None
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
class FinetuneLinearLRSchedulerArgs(BaseModel):
|
|
310
|
+
min_lr_ratio: float | None = 0.0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|