together 1.5.13__py3-none-any.whl → 1.5.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- together/cli/api/finetune.py +30 -0
- together/resources/finetune.py +54 -1
- together/types/finetune.py +4 -0
- {together-1.5.13.dist-info → together-1.5.14.dist-info}/METADATA +1 -1
- {together-1.5.13.dist-info → together-1.5.14.dist-info}/RECORD +8 -8
- {together-1.5.13.dist-info → together-1.5.14.dist-info}/LICENSE +0 -0
- {together-1.5.13.dist-info → together-1.5.14.dist-info}/WHEEL +0 -0
- {together-1.5.13.dist-info → together-1.5.14.dist-info}/entry_points.txt +0 -0
together/cli/api/finetune.py
CHANGED
|
@@ -142,6 +142,30 @@ def fine_tuning(ctx: click.Context) -> None:
|
|
|
142
142
|
default=0.1,
|
|
143
143
|
help="Beta parameter for DPO training (only used when '--training-method' is 'dpo')",
|
|
144
144
|
)
|
|
145
|
+
@click.option(
|
|
146
|
+
"--dpo-normalize-logratios-by-length",
|
|
147
|
+
type=bool,
|
|
148
|
+
default=False,
|
|
149
|
+
help=(
|
|
150
|
+
"Whether to normalize logratios by sample length "
|
|
151
|
+
"(only used when '--training-method' is 'dpo')"
|
|
152
|
+
),
|
|
153
|
+
)
|
|
154
|
+
@click.option(
|
|
155
|
+
"--rpo-alpha",
|
|
156
|
+
type=float,
|
|
157
|
+
default=0.0,
|
|
158
|
+
help=(
|
|
159
|
+
"RPO alpha parameter of DPO training to include NLL in the loss "
|
|
160
|
+
"(only used when '--training-method' is 'dpo')"
|
|
161
|
+
),
|
|
162
|
+
)
|
|
163
|
+
@click.option(
|
|
164
|
+
"--simpo-gamma",
|
|
165
|
+
type=float,
|
|
166
|
+
default=0.1,
|
|
167
|
+
help="SimPO gamma parameter (only used when '--training-method' is 'dpo')",
|
|
168
|
+
)
|
|
145
169
|
@click.option(
|
|
146
170
|
"--suffix",
|
|
147
171
|
"-s",
|
|
@@ -206,6 +230,9 @@ def create(
|
|
|
206
230
|
train_on_inputs: bool | Literal["auto"],
|
|
207
231
|
training_method: str,
|
|
208
232
|
dpo_beta: float,
|
|
233
|
+
dpo_normalize_logratios_by_length: bool,
|
|
234
|
+
rpo_alpha: float,
|
|
235
|
+
simpo_gamma: float,
|
|
209
236
|
from_checkpoint: str,
|
|
210
237
|
) -> None:
|
|
211
238
|
"""Start fine-tuning"""
|
|
@@ -239,6 +266,9 @@ def create(
|
|
|
239
266
|
train_on_inputs=train_on_inputs,
|
|
240
267
|
training_method=training_method,
|
|
241
268
|
dpo_beta=dpo_beta,
|
|
269
|
+
dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
|
|
270
|
+
rpo_alpha=rpo_alpha,
|
|
271
|
+
simpo_gamma=simpo_gamma,
|
|
242
272
|
from_checkpoint=from_checkpoint,
|
|
243
273
|
)
|
|
244
274
|
|
together/resources/finetune.py
CHANGED
|
@@ -72,6 +72,9 @@ def create_finetune_request(
|
|
|
72
72
|
train_on_inputs: bool | Literal["auto"] | None = None,
|
|
73
73
|
training_method: str = "sft",
|
|
74
74
|
dpo_beta: float | None = None,
|
|
75
|
+
dpo_normalize_logratios_by_length: bool = False,
|
|
76
|
+
rpo_alpha: float | None = None,
|
|
77
|
+
simpo_gamma: float | None = None,
|
|
75
78
|
from_checkpoint: str | None = None,
|
|
76
79
|
) -> FinetuneRequest:
|
|
77
80
|
if model is not None and from_checkpoint is not None:
|
|
@@ -182,6 +185,21 @@ def create_finetune_request(
|
|
|
182
185
|
|
|
183
186
|
if dpo_beta is not None and training_method != "dpo":
|
|
184
187
|
raise ValueError("dpo_beta is only supported for DPO training")
|
|
188
|
+
if dpo_normalize_logratios_by_length and training_method != "dpo":
|
|
189
|
+
raise ValueError(
|
|
190
|
+
"dpo_normalize_logratios_by_length=True is only supported for DPO training"
|
|
191
|
+
)
|
|
192
|
+
if rpo_alpha is not None:
|
|
193
|
+
if training_method != "dpo":
|
|
194
|
+
raise ValueError("rpo_alpha is only supported for DPO training")
|
|
195
|
+
if not rpo_alpha >= 0.0:
|
|
196
|
+
raise ValueError(f"rpo_alpha should be non-negative (got {rpo_alpha})")
|
|
197
|
+
|
|
198
|
+
if simpo_gamma is not None:
|
|
199
|
+
if training_method != "dpo":
|
|
200
|
+
raise ValueError("simpo_gamma is only supported for DPO training")
|
|
201
|
+
if not simpo_gamma >= 0.0:
|
|
202
|
+
raise ValueError(f"simpo_gamma should be non-negative (got {simpo_gamma})")
|
|
185
203
|
|
|
186
204
|
lr_scheduler: FinetuneLRScheduler
|
|
187
205
|
if lr_scheduler_type == "cosine":
|
|
@@ -204,7 +222,24 @@ def create_finetune_request(
|
|
|
204
222
|
if training_method == "sft":
|
|
205
223
|
training_method_cls = TrainingMethodSFT(train_on_inputs=train_on_inputs)
|
|
206
224
|
elif training_method == "dpo":
|
|
207
|
-
|
|
225
|
+
if simpo_gamma is not None and simpo_gamma > 0:
|
|
226
|
+
dpo_reference_free = True
|
|
227
|
+
dpo_normalize_logratios_by_length = True
|
|
228
|
+
rprint(
|
|
229
|
+
f"Parameter simpo_gamma was set to {simpo_gamma}. "
|
|
230
|
+
"SimPO training detected. Reference logits will not be used "
|
|
231
|
+
"and length normalization of log-probabilities will be enabled."
|
|
232
|
+
)
|
|
233
|
+
else:
|
|
234
|
+
dpo_reference_free = False
|
|
235
|
+
|
|
236
|
+
training_method_cls = TrainingMethodDPO(
|
|
237
|
+
dpo_beta=dpo_beta,
|
|
238
|
+
dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
|
|
239
|
+
dpo_reference_free=dpo_reference_free,
|
|
240
|
+
rpo_alpha=rpo_alpha,
|
|
241
|
+
simpo_gamma=simpo_gamma,
|
|
242
|
+
)
|
|
208
243
|
|
|
209
244
|
finetune_request = FinetuneRequest(
|
|
210
245
|
model=model,
|
|
@@ -302,6 +337,9 @@ class FineTuning:
|
|
|
302
337
|
train_on_inputs: bool | Literal["auto"] | None = None,
|
|
303
338
|
training_method: str = "sft",
|
|
304
339
|
dpo_beta: float | None = None,
|
|
340
|
+
dpo_normalize_logratios_by_length: bool = False,
|
|
341
|
+
rpo_alpha: float | None = None,
|
|
342
|
+
simpo_gamma: float | None = None,
|
|
305
343
|
from_checkpoint: str | None = None,
|
|
306
344
|
) -> FinetuneResponse:
|
|
307
345
|
"""
|
|
@@ -353,6 +391,9 @@ class FineTuning:
|
|
|
353
391
|
training_method (str, optional): Training method. Defaults to "sft".
|
|
354
392
|
Supported methods: "sft", "dpo".
|
|
355
393
|
dpo_beta (float, optional): DPO beta parameter. Defaults to None.
|
|
394
|
+
dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample length. Defaults to False,
|
|
395
|
+
rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None.
|
|
396
|
+
simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None.
|
|
356
397
|
from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job.
|
|
357
398
|
The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}.
|
|
358
399
|
The step value is optional, without it the final checkpoint will be used.
|
|
@@ -405,6 +446,9 @@ class FineTuning:
|
|
|
405
446
|
train_on_inputs=train_on_inputs,
|
|
406
447
|
training_method=training_method,
|
|
407
448
|
dpo_beta=dpo_beta,
|
|
449
|
+
dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
|
|
450
|
+
rpo_alpha=rpo_alpha,
|
|
451
|
+
simpo_gamma=simpo_gamma,
|
|
408
452
|
from_checkpoint=from_checkpoint,
|
|
409
453
|
)
|
|
410
454
|
|
|
@@ -714,6 +758,9 @@ class AsyncFineTuning:
|
|
|
714
758
|
train_on_inputs: bool | Literal["auto"] | None = None,
|
|
715
759
|
training_method: str = "sft",
|
|
716
760
|
dpo_beta: float | None = None,
|
|
761
|
+
dpo_normalize_logratios_by_length: bool = False,
|
|
762
|
+
rpo_alpha: float | None = None,
|
|
763
|
+
simpo_gamma: float | None = None,
|
|
717
764
|
from_checkpoint: str | None = None,
|
|
718
765
|
) -> FinetuneResponse:
|
|
719
766
|
"""
|
|
@@ -765,6 +812,9 @@ class AsyncFineTuning:
|
|
|
765
812
|
training_method (str, optional): Training method. Defaults to "sft".
|
|
766
813
|
Supported methods: "sft", "dpo".
|
|
767
814
|
dpo_beta (float, optional): DPO beta parameter. Defaults to None.
|
|
815
|
+
dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample length. Defaults to False,
|
|
816
|
+
rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None.
|
|
817
|
+
simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None.
|
|
768
818
|
from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job.
|
|
769
819
|
The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}.
|
|
770
820
|
The step value is optional, without it the final checkpoint will be used.
|
|
@@ -817,6 +867,9 @@ class AsyncFineTuning:
|
|
|
817
867
|
train_on_inputs=train_on_inputs,
|
|
818
868
|
training_method=training_method,
|
|
819
869
|
dpo_beta=dpo_beta,
|
|
870
|
+
dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
|
|
871
|
+
rpo_alpha=rpo_alpha,
|
|
872
|
+
simpo_gamma=simpo_gamma,
|
|
820
873
|
from_checkpoint=from_checkpoint,
|
|
821
874
|
)
|
|
822
875
|
|
together/types/finetune.py
CHANGED
|
@@ -159,6 +159,10 @@ class TrainingMethodDPO(TrainingMethod):
|
|
|
159
159
|
|
|
160
160
|
method: Literal["dpo"] = "dpo"
|
|
161
161
|
dpo_beta: float | None = None
|
|
162
|
+
dpo_normalize_logratios_by_length: bool = False
|
|
163
|
+
dpo_reference_free: bool = False
|
|
164
|
+
rpo_alpha: float | None = None
|
|
165
|
+
simpo_gamma: float | None = None
|
|
162
166
|
|
|
163
167
|
|
|
164
168
|
class FinetuneRequest(BaseModel):
|
|
@@ -7,7 +7,7 @@ together/cli/api/chat.py,sha256=2PHRb-9T-lUEKhUJFtc7SxJv3shCVx40gq_8pzfsewM,9234
|
|
|
7
7
|
together/cli/api/completions.py,sha256=l-Zw5t7hojL3w8xd_mitS2NRB72i5Z0xwkzH0rT5XMc,4263
|
|
8
8
|
together/cli/api/endpoints.py,sha256=f6KafWZvRF6n_ThWdr3y9uhE6wPF37PcD45w_EtgXmY,13289
|
|
9
9
|
together/cli/api/files.py,sha256=QLYEXRkY8J2Gg1SbTCtzGfoTMvosoeACNK83L_oLubs,3397
|
|
10
|
-
together/cli/api/finetune.py,sha256=
|
|
10
|
+
together/cli/api/finetune.py,sha256=zrjxpPSgqcZRhJA4A_QjXNhNUfEu24zw0Da3UfUlzrY,17063
|
|
11
11
|
together/cli/api/images.py,sha256=GADSeaNUHUVMtWovmccGuKc28IJ9E_v4vAEwYHJhu5o,2645
|
|
12
12
|
together/cli/api/models.py,sha256=CXw8B1hqNkadogi58GIXhLg_dTJnvTBaE7Kq1_xQ-10,1423
|
|
13
13
|
together/cli/api/utils.py,sha256=IuqYWPnLI38_Bqd7lj8V_SnGdYc59pRmMbQmciS4FsM,1326
|
|
@@ -35,7 +35,7 @@ together/resources/completions.py,sha256=5Wa-ZjPCxRcam6CDe7KgGYlTA7yJZMmd5TrRgGC
|
|
|
35
35
|
together/resources/embeddings.py,sha256=PTvLb82yjG_-iQOyuhsilp77Fr7gZ0o6WD2KeRnKoxs,2675
|
|
36
36
|
together/resources/endpoints.py,sha256=NNjp-wyzOotzlscGGrANhOHxQBjHTN8f5kTQTH_CLvE,17177
|
|
37
37
|
together/resources/files.py,sha256=y3Ri6UtyAa7fjCJ8_fp26Y2hzzi6Aoo21JKkVgljFl8,5026
|
|
38
|
-
together/resources/finetune.py,sha256=
|
|
38
|
+
together/resources/finetune.py,sha256=1O8JIbtLDY32N6hL88jUQVDEGcXFnl9qJAEREFoEK5k,40407
|
|
39
39
|
together/resources/images.py,sha256=LQUjKPaFxWTqOAPnyF1Pp7Rz4NLOYhmoKwshpYiprEM,4923
|
|
40
40
|
together/resources/models.py,sha256=qgmAXv61Cq4oLxytenEZBywA8shldDHYxJ_EAu_4JWQ,3864
|
|
41
41
|
together/resources/rerank.py,sha256=3Ju_aRSyZ1s_3zCSNZnSnEJErUVmt2xa3M8z1nvejMA,3931
|
|
@@ -52,7 +52,7 @@ together/types/embeddings.py,sha256=J7grkYYn7xhqeKaBO2T-8XQRtHhkzYzymovtGdIUK5A,
|
|
|
52
52
|
together/types/endpoints.py,sha256=EzNhHOoQ_D9fUdNQtxQPeSWiFzdFLqpNodN0YLmv_h0,4393
|
|
53
53
|
together/types/error.py,sha256=OVlCs3cx_2WhZK4JzHT8SQyRIIqKOP1AZQ4y1PydjAE,370
|
|
54
54
|
together/types/files.py,sha256=i-Ke57p8Svb1MbMZxu-Fo2zxIc6j-mDO2TLGNwPpGu0,1981
|
|
55
|
-
together/types/finetune.py,sha256=
|
|
55
|
+
together/types/finetune.py,sha256=6_jXgVVp4OOQXkABh0HKBzGy47H3wYCG2QxtXbdYauw,11079
|
|
56
56
|
together/types/images.py,sha256=xnC-FZGdZU30WSFTybfGneWxb-kj0ZGufJsgHtB8j0k,980
|
|
57
57
|
together/types/models.py,sha256=nwQIZGHKZpX9I6mK8z56VW70YC6Ry6JGsVa0s99QVxc,1055
|
|
58
58
|
together/types/rerank.py,sha256=qZfuXOn7MZ6ly8hpJ_MZ7OU_Bi1-cgYNSB20Wja8Qkk,1061
|
|
@@ -62,8 +62,8 @@ together/utils/api_helpers.py,sha256=2K0O6qeEQ2zVFvi5NBN5m2kjZJaS3-JfKFecQ7SmGaw
|
|
|
62
62
|
together/utils/files.py,sha256=btWQawwXbNKfPmCtRyObZViG1Xx-IPz45PrAtMXvcy8,16741
|
|
63
63
|
together/utils/tools.py,sha256=H2MTJhEqtBllaDvOyZehIO_IVNK3P17rSDeILtJIVag,2964
|
|
64
64
|
together/version.py,sha256=p03ivHyE0SyWU4jAnRTBi_sOwywVWoZPU4g2gzRgG-Y,126
|
|
65
|
-
together-1.5.
|
|
66
|
-
together-1.5.
|
|
67
|
-
together-1.5.
|
|
68
|
-
together-1.5.
|
|
69
|
-
together-1.5.
|
|
65
|
+
together-1.5.14.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
66
|
+
together-1.5.14.dist-info/METADATA,sha256=5fJlYeJKCtS-wVbWPtI_CDWKVSpMDvF3t-DmC8qxZ2U,15497
|
|
67
|
+
together-1.5.14.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
68
|
+
together-1.5.14.dist-info/entry_points.txt,sha256=G-b5NKW6lUUf1V1fH8IPTBb7jXnK7lhbX9H1zTEJXPs,50
|
|
69
|
+
together-1.5.14.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|