together 1.5.13__py3-none-any.whl → 1.5.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -142,6 +142,30 @@ def fine_tuning(ctx: click.Context) -> None:
142
142
  default=0.1,
143
143
  help="Beta parameter for DPO training (only used when '--training-method' is 'dpo')",
144
144
  )
145
+ @click.option(
146
+ "--dpo-normalize-logratios-by-length",
147
+ type=bool,
148
+ default=False,
149
+ help=(
150
+ "Whether to normalize logratios by sample length "
151
+ "(only used when '--training-method' is 'dpo')"
152
+ ),
153
+ )
154
+ @click.option(
155
+ "--rpo-alpha",
156
+ type=float,
157
+ default=0.0,
158
+ help=(
159
+ "RPO alpha parameter of DPO training to include NLL in the loss "
160
+ "(only used when '--training-method' is 'dpo')"
161
+ ),
162
+ )
163
+ @click.option(
164
+ "--simpo-gamma",
165
+ type=float,
166
+ default=0.1,
167
+ help="SimPO gamma parameter (only used when '--training-method' is 'dpo')",
168
+ )
145
169
  @click.option(
146
170
  "--suffix",
147
171
  "-s",
@@ -206,6 +230,9 @@ def create(
206
230
  train_on_inputs: bool | Literal["auto"],
207
231
  training_method: str,
208
232
  dpo_beta: float,
233
+ dpo_normalize_logratios_by_length: bool,
234
+ rpo_alpha: float,
235
+ simpo_gamma: float,
209
236
  from_checkpoint: str,
210
237
  ) -> None:
211
238
  """Start fine-tuning"""
@@ -239,6 +266,9 @@ def create(
239
266
  train_on_inputs=train_on_inputs,
240
267
  training_method=training_method,
241
268
  dpo_beta=dpo_beta,
269
+ dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
270
+ rpo_alpha=rpo_alpha,
271
+ simpo_gamma=simpo_gamma,
242
272
  from_checkpoint=from_checkpoint,
243
273
  )
244
274
 
@@ -72,6 +72,9 @@ def create_finetune_request(
72
72
  train_on_inputs: bool | Literal["auto"] | None = None,
73
73
  training_method: str = "sft",
74
74
  dpo_beta: float | None = None,
75
+ dpo_normalize_logratios_by_length: bool = False,
76
+ rpo_alpha: float | None = None,
77
+ simpo_gamma: float | None = None,
75
78
  from_checkpoint: str | None = None,
76
79
  ) -> FinetuneRequest:
77
80
  if model is not None and from_checkpoint is not None:
@@ -182,6 +185,21 @@ def create_finetune_request(
182
185
 
183
186
  if dpo_beta is not None and training_method != "dpo":
184
187
  raise ValueError("dpo_beta is only supported for DPO training")
188
+ if dpo_normalize_logratios_by_length and training_method != "dpo":
189
+ raise ValueError(
190
+ "dpo_normalize_logratios_by_length=True is only supported for DPO training"
191
+ )
192
+ if rpo_alpha is not None:
193
+ if training_method != "dpo":
194
+ raise ValueError("rpo_alpha is only supported for DPO training")
195
+ if not rpo_alpha >= 0.0:
196
+ raise ValueError(f"rpo_alpha should be non-negative (got {rpo_alpha})")
197
+
198
+ if simpo_gamma is not None:
199
+ if training_method != "dpo":
200
+ raise ValueError("simpo_gamma is only supported for DPO training")
201
+ if not simpo_gamma >= 0.0:
202
+ raise ValueError(f"simpo_gamma should be non-negative (got {simpo_gamma})")
185
203
 
186
204
  lr_scheduler: FinetuneLRScheduler
187
205
  if lr_scheduler_type == "cosine":
@@ -204,7 +222,24 @@ def create_finetune_request(
204
222
  if training_method == "sft":
205
223
  training_method_cls = TrainingMethodSFT(train_on_inputs=train_on_inputs)
206
224
  elif training_method == "dpo":
207
- training_method_cls = TrainingMethodDPO(dpo_beta=dpo_beta)
225
+ if simpo_gamma is not None and simpo_gamma > 0:
226
+ dpo_reference_free = True
227
+ dpo_normalize_logratios_by_length = True
228
+ rprint(
229
+ f"Parameter simpo_gamma was set to {simpo_gamma}. "
230
+ "SimPO training detected. Reference logits will not be used "
231
+ "and length normalization of log-probabilities will be enabled."
232
+ )
233
+ else:
234
+ dpo_reference_free = False
235
+
236
+ training_method_cls = TrainingMethodDPO(
237
+ dpo_beta=dpo_beta,
238
+ dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
239
+ dpo_reference_free=dpo_reference_free,
240
+ rpo_alpha=rpo_alpha,
241
+ simpo_gamma=simpo_gamma,
242
+ )
208
243
 
209
244
  finetune_request = FinetuneRequest(
210
245
  model=model,
@@ -302,6 +337,9 @@ class FineTuning:
302
337
  train_on_inputs: bool | Literal["auto"] | None = None,
303
338
  training_method: str = "sft",
304
339
  dpo_beta: float | None = None,
340
+ dpo_normalize_logratios_by_length: bool = False,
341
+ rpo_alpha: float | None = None,
342
+ simpo_gamma: float | None = None,
305
343
  from_checkpoint: str | None = None,
306
344
  ) -> FinetuneResponse:
307
345
  """
@@ -353,6 +391,9 @@ class FineTuning:
353
391
  training_method (str, optional): Training method. Defaults to "sft".
354
392
  Supported methods: "sft", "dpo".
355
393
  dpo_beta (float, optional): DPO beta parameter. Defaults to None.
394
+ dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample length. Defaults to False,
395
+ rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None.
396
+ simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None.
356
397
  from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job.
357
398
  The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}.
358
399
  The step value is optional, without it the final checkpoint will be used.
@@ -405,6 +446,9 @@ class FineTuning:
405
446
  train_on_inputs=train_on_inputs,
406
447
  training_method=training_method,
407
448
  dpo_beta=dpo_beta,
449
+ dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
450
+ rpo_alpha=rpo_alpha,
451
+ simpo_gamma=simpo_gamma,
408
452
  from_checkpoint=from_checkpoint,
409
453
  )
410
454
 
@@ -714,6 +758,9 @@ class AsyncFineTuning:
714
758
  train_on_inputs: bool | Literal["auto"] | None = None,
715
759
  training_method: str = "sft",
716
760
  dpo_beta: float | None = None,
761
+ dpo_normalize_logratios_by_length: bool = False,
762
+ rpo_alpha: float | None = None,
763
+ simpo_gamma: float | None = None,
717
764
  from_checkpoint: str | None = None,
718
765
  ) -> FinetuneResponse:
719
766
  """
@@ -765,6 +812,9 @@ class AsyncFineTuning:
765
812
  training_method (str, optional): Training method. Defaults to "sft".
766
813
  Supported methods: "sft", "dpo".
767
814
  dpo_beta (float, optional): DPO beta parameter. Defaults to None.
815
+ dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample length. Defaults to False,
816
+ rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None.
817
+ simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None.
768
818
  from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job.
769
819
  The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}.
770
820
  The step value is optional, without it the final checkpoint will be used.
@@ -817,6 +867,9 @@ class AsyncFineTuning:
817
867
  train_on_inputs=train_on_inputs,
818
868
  training_method=training_method,
819
869
  dpo_beta=dpo_beta,
870
+ dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
871
+ rpo_alpha=rpo_alpha,
872
+ simpo_gamma=simpo_gamma,
820
873
  from_checkpoint=from_checkpoint,
821
874
  )
822
875
 
@@ -159,6 +159,10 @@ class TrainingMethodDPO(TrainingMethod):
159
159
 
160
160
  method: Literal["dpo"] = "dpo"
161
161
  dpo_beta: float | None = None
162
+ dpo_normalize_logratios_by_length: bool = False
163
+ dpo_reference_free: bool = False
164
+ rpo_alpha: float | None = None
165
+ simpo_gamma: float | None = None
162
166
 
163
167
 
164
168
  class FinetuneRequest(BaseModel):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: together
3
- Version: 1.5.13
3
+ Version: 1.5.14
4
4
  Summary: Python client for Together's Cloud Platform!
5
5
  License: Apache-2.0
6
6
  Author: Together AI
@@ -7,7 +7,7 @@ together/cli/api/chat.py,sha256=2PHRb-9T-lUEKhUJFtc7SxJv3shCVx40gq_8pzfsewM,9234
7
7
  together/cli/api/completions.py,sha256=l-Zw5t7hojL3w8xd_mitS2NRB72i5Z0xwkzH0rT5XMc,4263
8
8
  together/cli/api/endpoints.py,sha256=f6KafWZvRF6n_ThWdr3y9uhE6wPF37PcD45w_EtgXmY,13289
9
9
  together/cli/api/files.py,sha256=QLYEXRkY8J2Gg1SbTCtzGfoTMvosoeACNK83L_oLubs,3397
10
- together/cli/api/finetune.py,sha256=mM8GF6xP-NM-eOQfn8eXBCLop77OesLVaATAfKm_HMo,16238
10
+ together/cli/api/finetune.py,sha256=zrjxpPSgqcZRhJA4A_QjXNhNUfEu24zw0Da3UfUlzrY,17063
11
11
  together/cli/api/images.py,sha256=GADSeaNUHUVMtWovmccGuKc28IJ9E_v4vAEwYHJhu5o,2645
12
12
  together/cli/api/models.py,sha256=CXw8B1hqNkadogi58GIXhLg_dTJnvTBaE7Kq1_xQ-10,1423
13
13
  together/cli/api/utils.py,sha256=IuqYWPnLI38_Bqd7lj8V_SnGdYc59pRmMbQmciS4FsM,1326
@@ -35,7 +35,7 @@ together/resources/completions.py,sha256=5Wa-ZjPCxRcam6CDe7KgGYlTA7yJZMmd5TrRgGC
35
35
  together/resources/embeddings.py,sha256=PTvLb82yjG_-iQOyuhsilp77Fr7gZ0o6WD2KeRnKoxs,2675
36
36
  together/resources/endpoints.py,sha256=NNjp-wyzOotzlscGGrANhOHxQBjHTN8f5kTQTH_CLvE,17177
37
37
  together/resources/files.py,sha256=y3Ri6UtyAa7fjCJ8_fp26Y2hzzi6Aoo21JKkVgljFl8,5026
38
- together/resources/finetune.py,sha256=M-nvOZMnL7ZyTayiKN9Vos8D1uYNj-ENmrOA2bkPF8A,37617
38
+ together/resources/finetune.py,sha256=1O8JIbtLDY32N6hL88jUQVDEGcXFnl9qJAEREFoEK5k,40407
39
39
  together/resources/images.py,sha256=LQUjKPaFxWTqOAPnyF1Pp7Rz4NLOYhmoKwshpYiprEM,4923
40
40
  together/resources/models.py,sha256=qgmAXv61Cq4oLxytenEZBywA8shldDHYxJ_EAu_4JWQ,3864
41
41
  together/resources/rerank.py,sha256=3Ju_aRSyZ1s_3zCSNZnSnEJErUVmt2xa3M8z1nvejMA,3931
@@ -52,7 +52,7 @@ together/types/embeddings.py,sha256=J7grkYYn7xhqeKaBO2T-8XQRtHhkzYzymovtGdIUK5A,
52
52
  together/types/endpoints.py,sha256=EzNhHOoQ_D9fUdNQtxQPeSWiFzdFLqpNodN0YLmv_h0,4393
53
53
  together/types/error.py,sha256=OVlCs3cx_2WhZK4JzHT8SQyRIIqKOP1AZQ4y1PydjAE,370
54
54
  together/types/files.py,sha256=i-Ke57p8Svb1MbMZxu-Fo2zxIc6j-mDO2TLGNwPpGu0,1981
55
- together/types/finetune.py,sha256=Utdcm3kL_cDfBS3zjXwyHsuP2qFFjCQiQZOsPD-WlpE,10918
55
+ together/types/finetune.py,sha256=6_jXgVVp4OOQXkABh0HKBzGy47H3wYCG2QxtXbdYauw,11079
56
56
  together/types/images.py,sha256=xnC-FZGdZU30WSFTybfGneWxb-kj0ZGufJsgHtB8j0k,980
57
57
  together/types/models.py,sha256=nwQIZGHKZpX9I6mK8z56VW70YC6Ry6JGsVa0s99QVxc,1055
58
58
  together/types/rerank.py,sha256=qZfuXOn7MZ6ly8hpJ_MZ7OU_Bi1-cgYNSB20Wja8Qkk,1061
@@ -62,8 +62,8 @@ together/utils/api_helpers.py,sha256=2K0O6qeEQ2zVFvi5NBN5m2kjZJaS3-JfKFecQ7SmGaw
62
62
  together/utils/files.py,sha256=btWQawwXbNKfPmCtRyObZViG1Xx-IPz45PrAtMXvcy8,16741
63
63
  together/utils/tools.py,sha256=H2MTJhEqtBllaDvOyZehIO_IVNK3P17rSDeILtJIVag,2964
64
64
  together/version.py,sha256=p03ivHyE0SyWU4jAnRTBi_sOwywVWoZPU4g2gzRgG-Y,126
65
- together-1.5.13.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
66
- together-1.5.13.dist-info/METADATA,sha256=pH3dlk_0LIhPs8Z6HW5xnGFPLnxTT7t_-WA3WP4tp1s,15497
67
- together-1.5.13.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
68
- together-1.5.13.dist-info/entry_points.txt,sha256=G-b5NKW6lUUf1V1fH8IPTBb7jXnK7lhbX9H1zTEJXPs,50
69
- together-1.5.13.dist-info/RECORD,,
65
+ together-1.5.14.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
66
+ together-1.5.14.dist-info/METADATA,sha256=5fJlYeJKCtS-wVbWPtI_CDWKVSpMDvF3t-DmC8qxZ2U,15497
67
+ together-1.5.14.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
68
+ together-1.5.14.dist-info/entry_points.txt,sha256=G-b5NKW6lUUf1V1fH8IPTBb7jXnK7lhbX9H1zTEJXPs,50
69
+ together-1.5.14.dist-info/RECORD,,