PyPI - together - Versions diffs - 1.5.13__py3-none-any.whl → 1.5.14__py3-none-any.whl - Mend

together 1.5.13py3-none-any.whl → 1.5.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

together/cli/api/finetune.py CHANGED Viewed

@@ -142,6 +142,30 @@ def fine_tuning(ctx: click.Context) -> None:
     default=0.1,
     help="Beta parameter for DPO training (only used when '--training-method' is 'dpo')",
 )
+@click.option(
+    "--dpo-normalize-logratios-by-length",
+    type=bool,
+    default=False,
+    help=(
+        "Whether to normalize logratios by sample length "
+        "(only used when '--training-method' is 'dpo')"
+    ),
+)
+@click.option(
+    "--rpo-alpha",
+    type=float,
+    default=0.0,
+    help=(
+        "RPO alpha parameter of DPO training to include NLL in the loss "
+        "(only used when '--training-method' is 'dpo')"
+    ),
+)
+@click.option(
+    "--simpo-gamma",
+    type=float,
+    default=0.1,
+    help="SimPO gamma parameter (only used when '--training-method' is 'dpo')",
+)
 @click.option(
     "--suffix",
     "-s",
@@ -206,6 +230,9 @@ def create(
     train_on_inputs: bool | Literal["auto"],
     training_method: str,
     dpo_beta: float,
+    dpo_normalize_logratios_by_length: bool,
+    rpo_alpha: float,
+    simpo_gamma: float,
     from_checkpoint: str,
 ) -> None:
     """Start fine-tuning"""
@@ -239,6 +266,9 @@ def create(
         train_on_inputs=train_on_inputs,
         training_method=training_method,
         dpo_beta=dpo_beta,
+        dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
+        rpo_alpha=rpo_alpha,
+        simpo_gamma=simpo_gamma,
         from_checkpoint=from_checkpoint,
     )

together/resources/finetune.py CHANGED Viewed

@@ -72,6 +72,9 @@ def create_finetune_request(
     train_on_inputs: bool | Literal["auto"] | None = None,
     training_method: str = "sft",
     dpo_beta: float | None = None,
+    dpo_normalize_logratios_by_length: bool = False,
+    rpo_alpha: float | None = None,
+    simpo_gamma: float | None = None,
     from_checkpoint: str | None = None,
 ) -> FinetuneRequest:
     if model is not None and from_checkpoint is not None:
@@ -182,6 +185,21 @@ def create_finetune_request(
     if dpo_beta is not None and training_method != "dpo":
         raise ValueError("dpo_beta is only supported for DPO training")
+    if dpo_normalize_logratios_by_length and training_method != "dpo":
+        raise ValueError(
+            "dpo_normalize_logratios_by_length=True is only supported for DPO training"
+        )
+    if rpo_alpha is not None:
+        if training_method != "dpo":
+            raise ValueError("rpo_alpha is only supported for DPO training")
+        if not rpo_alpha >= 0.0:
+            raise ValueError(f"rpo_alpha should be non-negative (got {rpo_alpha})")
+    if simpo_gamma is not None:
+        if training_method != "dpo":
+            raise ValueError("simpo_gamma is only supported for DPO training")
+        if not simpo_gamma >= 0.0:
+            raise ValueError(f"simpo_gamma should be non-negative (got {simpo_gamma})")
     lr_scheduler: FinetuneLRScheduler
     if lr_scheduler_type == "cosine":
@@ -204,7 +222,24 @@ def create_finetune_request(
     if training_method == "sft":
         training_method_cls = TrainingMethodSFT(train_on_inputs=train_on_inputs)
     elif training_method == "dpo":
-        training_method_cls = TrainingMethodDPO(dpo_beta=dpo_beta)
+        if simpo_gamma is not None and simpo_gamma > 0:
+            dpo_reference_free = True
+            dpo_normalize_logratios_by_length = True
+            rprint(
+                f"Parameter simpo_gamma was set to {simpo_gamma}. "
+                "SimPO training detected. Reference logits will not be used "
+                "and length normalization of log-probabilities will be enabled."
+            )
+        else:
+            dpo_reference_free = False
+        training_method_cls = TrainingMethodDPO(
+            dpo_beta=dpo_beta,
+            dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
+            dpo_reference_free=dpo_reference_free,
+            rpo_alpha=rpo_alpha,
+            simpo_gamma=simpo_gamma,
+        )
     finetune_request = FinetuneRequest(
         model=model,
@@ -302,6 +337,9 @@ class FineTuning:
         train_on_inputs: bool | Literal["auto"] | None = None,
         training_method: str = "sft",
         dpo_beta: float | None = None,
+        dpo_normalize_logratios_by_length: bool = False,
+        rpo_alpha: float | None = None,
+        simpo_gamma: float | None = None,
         from_checkpoint: str | None = None,
     ) -> FinetuneResponse:
         """
@@ -353,6 +391,9 @@ class FineTuning:
             training_method (str, optional): Training method. Defaults to "sft".
                 Supported methods: "sft", "dpo".
             dpo_beta (float, optional): DPO beta parameter. Defaults to None.
+            dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample length. Defaults to False,
+            rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None.
+            simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None.
             from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job.
                 The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}.
                 The step value is optional, without it the final checkpoint will be used.
@@ -405,6 +446,9 @@ class FineTuning:
             train_on_inputs=train_on_inputs,
             training_method=training_method,
             dpo_beta=dpo_beta,
+            dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
+            rpo_alpha=rpo_alpha,
+            simpo_gamma=simpo_gamma,
             from_checkpoint=from_checkpoint,
         )
@@ -714,6 +758,9 @@ class AsyncFineTuning:
         train_on_inputs: bool | Literal["auto"] | None = None,
         training_method: str = "sft",
         dpo_beta: float | None = None,
+        dpo_normalize_logratios_by_length: bool = False,
+        rpo_alpha: float | None = None,
+        simpo_gamma: float | None = None,
         from_checkpoint: str | None = None,
     ) -> FinetuneResponse:
         """
@@ -765,6 +812,9 @@ class AsyncFineTuning:
             training_method (str, optional): Training method. Defaults to "sft".
                 Supported methods: "sft", "dpo".
             dpo_beta (float, optional): DPO beta parameter. Defaults to None.
+            dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample length. Defaults to False,
+            rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None.
+            simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None.
             from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job.
                 The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}.
                 The step value is optional, without it the final checkpoint will be used.
@@ -817,6 +867,9 @@ class AsyncFineTuning:
             train_on_inputs=train_on_inputs,
             training_method=training_method,
             dpo_beta=dpo_beta,
+            dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
+            rpo_alpha=rpo_alpha,
+            simpo_gamma=simpo_gamma,
             from_checkpoint=from_checkpoint,
         )

together/types/finetune.py CHANGED Viewed

@@ -159,6 +159,10 @@ class TrainingMethodDPO(TrainingMethod):
     method: Literal["dpo"] = "dpo"
     dpo_beta: float | None = None
+    dpo_normalize_logratios_by_length: bool = False
+    dpo_reference_free: bool = False
+    rpo_alpha: float | None = None
+    simpo_gamma: float | None = None
 class FinetuneRequest(BaseModel):

{together-1.5.13.dist-info → together-1.5.14.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: together
-Version: 1.5.13
+Version: 1.5.14
 Summary: Python client for Together's Cloud Platform!
 License: Apache-2.0
 Author: Together AI

{together-1.5.13.dist-info → together-1.5.14.dist-info}/RECORD RENAMED Viewed

@@ -7,7 +7,7 @@ together/cli/api/chat.py,sha256=2PHRb-9T-lUEKhUJFtc7SxJv3shCVx40gq_8pzfsewM,9234
 together/cli/api/completions.py,sha256=l-Zw5t7hojL3w8xd_mitS2NRB72i5Z0xwkzH0rT5XMc,4263
 together/cli/api/endpoints.py,sha256=f6KafWZvRF6n_ThWdr3y9uhE6wPF37PcD45w_EtgXmY,13289
 together/cli/api/files.py,sha256=QLYEXRkY8J2Gg1SbTCtzGfoTMvosoeACNK83L_oLubs,3397
-together/cli/api/finetune.py,sha256=mM8GF6xP-NM-eOQfn8eXBCLop77OesLVaATAfKm_HMo,16238
+together/cli/api/finetune.py,sha256=zrjxpPSgqcZRhJA4A_QjXNhNUfEu24zw0Da3UfUlzrY,17063
 together/cli/api/images.py,sha256=GADSeaNUHUVMtWovmccGuKc28IJ9E_v4vAEwYHJhu5o,2645
 together/cli/api/models.py,sha256=CXw8B1hqNkadogi58GIXhLg_dTJnvTBaE7Kq1_xQ-10,1423
 together/cli/api/utils.py,sha256=IuqYWPnLI38_Bqd7lj8V_SnGdYc59pRmMbQmciS4FsM,1326
@@ -35,7 +35,7 @@ together/resources/completions.py,sha256=5Wa-ZjPCxRcam6CDe7KgGYlTA7yJZMmd5TrRgGC
 together/resources/embeddings.py,sha256=PTvLb82yjG_-iQOyuhsilp77Fr7gZ0o6WD2KeRnKoxs,2675
 together/resources/endpoints.py,sha256=NNjp-wyzOotzlscGGrANhOHxQBjHTN8f5kTQTH_CLvE,17177
 together/resources/files.py,sha256=y3Ri6UtyAa7fjCJ8_fp26Y2hzzi6Aoo21JKkVgljFl8,5026
-together/resources/finetune.py,sha256=M-nvOZMnL7ZyTayiKN9Vos8D1uYNj-ENmrOA2bkPF8A,37617
+together/resources/finetune.py,sha256=1O8JIbtLDY32N6hL88jUQVDEGcXFnl9qJAEREFoEK5k,40407
 together/resources/images.py,sha256=LQUjKPaFxWTqOAPnyF1Pp7Rz4NLOYhmoKwshpYiprEM,4923
 together/resources/models.py,sha256=qgmAXv61Cq4oLxytenEZBywA8shldDHYxJ_EAu_4JWQ,3864
 together/resources/rerank.py,sha256=3Ju_aRSyZ1s_3zCSNZnSnEJErUVmt2xa3M8z1nvejMA,3931
@@ -52,7 +52,7 @@ together/types/embeddings.py,sha256=J7grkYYn7xhqeKaBO2T-8XQRtHhkzYzymovtGdIUK5A,
 together/types/endpoints.py,sha256=EzNhHOoQ_D9fUdNQtxQPeSWiFzdFLqpNodN0YLmv_h0,4393
 together/types/error.py,sha256=OVlCs3cx_2WhZK4JzHT8SQyRIIqKOP1AZQ4y1PydjAE,370
 together/types/files.py,sha256=i-Ke57p8Svb1MbMZxu-Fo2zxIc6j-mDO2TLGNwPpGu0,1981
-together/types/finetune.py,sha256=Utdcm3kL_cDfBS3zjXwyHsuP2qFFjCQiQZOsPD-WlpE,10918
+together/types/finetune.py,sha256=6_jXgVVp4OOQXkABh0HKBzGy47H3wYCG2QxtXbdYauw,11079
 together/types/images.py,sha256=xnC-FZGdZU30WSFTybfGneWxb-kj0ZGufJsgHtB8j0k,980
 together/types/models.py,sha256=nwQIZGHKZpX9I6mK8z56VW70YC6Ry6JGsVa0s99QVxc,1055
 together/types/rerank.py,sha256=qZfuXOn7MZ6ly8hpJ_MZ7OU_Bi1-cgYNSB20Wja8Qkk,1061
@@ -62,8 +62,8 @@ together/utils/api_helpers.py,sha256=2K0O6qeEQ2zVFvi5NBN5m2kjZJaS3-JfKFecQ7SmGaw
 together/utils/files.py,sha256=btWQawwXbNKfPmCtRyObZViG1Xx-IPz45PrAtMXvcy8,16741
 together/utils/tools.py,sha256=H2MTJhEqtBllaDvOyZehIO_IVNK3P17rSDeILtJIVag,2964
 together/version.py,sha256=p03ivHyE0SyWU4jAnRTBi_sOwywVWoZPU4g2gzRgG-Y,126
-together-1.5.13.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-together-1.5.13.dist-info/METADATA,sha256=pH3dlk_0LIhPs8Z6HW5xnGFPLnxTT7t_-WA3WP4tp1s,15497
-together-1.5.13.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-together-1.5.13.dist-info/entry_points.txt,sha256=G-b5NKW6lUUf1V1fH8IPTBb7jXnK7lhbX9H1zTEJXPs,50
-together-1.5.13.dist-info/RECORD,,
+together-1.5.14.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+together-1.5.14.dist-info/METADATA,sha256=5fJlYeJKCtS-wVbWPtI_CDWKVSpMDvF3t-DmC8qxZ2U,15497
+together-1.5.14.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+together-1.5.14.dist-info/entry_points.txt,sha256=G-b5NKW6lUUf1V1fH8IPTBb7jXnK7lhbX9H1zTEJXPs,50
+together-1.5.14.dist-info/RECORD,,

{together-1.5.13.dist-info → together-1.5.14.dist-info}/LICENSE RENAMED Viewed

File without changes

{together-1.5.13.dist-info → together-1.5.14.dist-info}/WHEEL RENAMED Viewed

File without changes

{together-1.5.13.dist-info → together-1.5.14.dist-info}/entry_points.txt RENAMED Viewed

File without changes

together 1.5.13__py3-none-any.whl → 1.5.14__py3-none-any.whl

together 1.5.13py3-none-any.whl → 1.5.14py3-none-any.whl