PyPI - together - Versions diffs - 1.3.4__py3-none-any.whl → 1.3.5__py3-none-any.whl - Mend

together 1.3.4py3-none-any.whl → 1.3.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

together/cli/api/finetune.py CHANGED Viewed

@@ -65,12 +65,30 @@ def fine_tuning(ctx: click.Context) -> None:
 )
 @click.option("--batch-size", type=INT_WITH_MAX, default="max", help="Train batch size")
 @click.option("--learning-rate", type=float, default=1e-5, help="Learning rate")
+@click.option(
+    "--min-lr-ratio",
+    type=float,
+    default=0.0,
+    help="The ratio of the final learning rate to the peak learning rate",
+)
 @click.option(
     "--warmup-ratio",
     type=float,
     default=0.0,
     help="Warmup ratio for learning rate scheduler.",
 )
+@click.option(
+    "--max-grad-norm",
+    type=float,
+    default=1.0,
+    help="Max gradient norm to be used for gradient clipping. Set to 0 to disable.",
+)
+@click.option(
+    "--weight-decay",
+    type=float,
+    default=0.0,
+    help="Weight decay",
+)
 @click.option(
     "--lora/--no-lora",
     type=bool,
@@ -115,7 +133,10 @@ def create(
     n_checkpoints: int,
     batch_size: int | Literal["max"],
     learning_rate: float,
+    min_lr_ratio: float,
     warmup_ratio: float,
+    max_grad_norm: float,
+    weight_decay: float,
     lora: bool,
     lora_r: int,
     lora_dropout: float,
@@ -138,7 +159,10 @@ def create(
         n_checkpoints=n_checkpoints,
         batch_size=batch_size,
         learning_rate=learning_rate,
+        min_lr_ratio=min_lr_ratio,
         warmup_ratio=warmup_ratio,
+        max_grad_norm=max_grad_norm,
+        weight_decay=weight_decay,
         lora=lora,
         lora_r=lora_r,
         lora_dropout=lora_dropout,

together/resources/finetune.py CHANGED Viewed

@@ -20,6 +20,8 @@ from together.types import (
     TogetherClient,
     TogetherRequest,
     TrainingType,
+    FinetuneLRScheduler,
+    FinetuneLinearLRSchedulerArgs,
 )
 from together.types.finetune import DownloadCheckpointType
 from together.utils import log_warn_once, normalize_key
@@ -35,7 +37,10 @@ def createFinetuneRequest(
     n_checkpoints: int | None = 1,
     batch_size: int | Literal["max"] = "max",
     learning_rate: float | None = 0.00001,
-    warmup_ratio: float | None = 0.0,
+    min_lr_ratio: float = 0.0,
+    warmup_ratio: float = 0.0,
+    max_grad_norm: float = 1.0,
+    weight_decay: float = 0.0,
     lora: bool = False,
     lora_r: int | None = None,
     lora_dropout: float | None = 0,
@@ -83,6 +88,20 @@ def createFinetuneRequest(
     if warmup_ratio > 1 or warmup_ratio < 0:
         raise ValueError("Warmup ratio should be between 0 and 1")
+    if min_lr_ratio is not None and (min_lr_ratio > 1 or min_lr_ratio < 0):
+        raise ValueError("Min learning rate ratio should be between 0 and 1")
+    if max_grad_norm < 0:
+        raise ValueError("Max gradient norm should be non-negative")
+    if weight_decay is not None and (weight_decay < 0):
+        raise ValueError("Weight decay should be non-negative")
+    lrScheduler = FinetuneLRScheduler(
+        lr_scheduler_type="linear",
+        lr_scheduler_args=FinetuneLinearLRSchedulerArgs(min_lr_ratio=min_lr_ratio),
+    )
     finetune_request = FinetuneRequest(
         model=model,
         training_file=training_file,
@@ -92,7 +111,10 @@ def createFinetuneRequest(
         n_checkpoints=n_checkpoints,
         batch_size=batch_size,
         learning_rate=learning_rate,
+        lr_scheduler=lrScheduler,
         warmup_ratio=warmup_ratio,
+        max_grad_norm=max_grad_norm,
+        weight_decay=weight_decay,
         training_type=training_type,
         suffix=suffix,
         wandb_key=wandb_api_key,
@@ -117,7 +139,10 @@ class FineTuning:
         n_checkpoints: int | None = 1,
         batch_size: int | Literal["max"] = "max",
         learning_rate: float | None = 0.00001,
-        warmup_ratio: float | None = 0.0,
+        min_lr_ratio: float = 0.0,
+        warmup_ratio: float = 0.0,
+        max_grad_norm: float = 1.0,
+        weight_decay: float = 0.0,
         lora: bool = False,
         lora_r: int | None = None,
         lora_dropout: float | None = 0,
@@ -143,7 +168,11 @@ class FineTuning:
             batch_size (int or "max"): Batch size for fine-tuning. Defaults to max.
             learning_rate (float, optional): Learning rate multiplier to use for training
                 Defaults to 0.00001.
+            min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
+                the learning rate scheduler. Defaults to 0.0.
             warmup_ratio (float, optional): Warmup ratio for learning rate scheduler.
+            max_grad_norm (float, optional): Max gradient norm. Defaults to 1.0, set to 0 to disable.
+            weight_decay (float, optional): Weight decay. Defaults to 0.0.
             lora (bool, optional): Whether to use LoRA adapters. Defaults to True.
             lora_r (int, optional): Rank of LoRA adapters. Defaults to 8.
             lora_dropout (float, optional): Dropout rate for LoRA adapters. Defaults to 0.
@@ -185,7 +214,10 @@ class FineTuning:
             n_checkpoints=n_checkpoints,
             batch_size=batch_size,
             learning_rate=learning_rate,
+            min_lr_ratio=min_lr_ratio,
             warmup_ratio=warmup_ratio,
+            max_grad_norm=max_grad_norm,
+            weight_decay=weight_decay,
             lora=lora,
             lora_r=lora_r,
             lora_dropout=lora_dropout,
@@ -436,7 +468,10 @@ class AsyncFineTuning:
         n_checkpoints: int | None = 1,
         batch_size: int | Literal["max"] = "max",
         learning_rate: float | None = 0.00001,
-        warmup_ratio: float | None = 0.0,
+        min_lr_ratio: float = 0.0,
+        warmup_ratio: float = 0.0,
+        max_grad_norm: float = 1.0,
+        weight_decay: float = 0.0,
         lora: bool = False,
         lora_r: int | None = None,
         lora_dropout: float | None = 0,
@@ -462,7 +497,11 @@ class AsyncFineTuning:
             batch_size (int, optional): Batch size for fine-tuning. Defaults to max.
             learning_rate (float, optional): Learning rate multiplier to use for training
                 Defaults to 0.00001.
+            min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
+                the learning rate scheduler. Defaults to 0.0.
             warmup_ratio (float, optional): Warmup ratio for learning rate scheduler.
+            max_grad_norm (float, optional): Max gradient norm. Defaults to 1.0, set to 0 to disable.
+            weight_decay (float, optional): Weight decay. Defaults to 0.0.
             lora (bool, optional): Whether to use LoRA adapters. Defaults to True.
             lora_r (int, optional): Rank of LoRA adapters. Defaults to 8.
             lora_dropout (float, optional): Dropout rate for LoRA adapters. Defaults to 0.
@@ -504,7 +543,10 @@ class AsyncFineTuning:
             n_checkpoints=n_checkpoints,
             batch_size=batch_size,
             learning_rate=learning_rate,
+            min_lr_ratio=min_lr_ratio,
             warmup_ratio=warmup_ratio,
+            max_grad_norm=max_grad_norm,
+            weight_decay=weight_decay,
             lora=lora,
             lora_r=lora_r,
             lora_dropout=lora_dropout,

together/types/__init__.py CHANGED Viewed

@@ -30,6 +30,8 @@ from together.types.finetune import (
     LoRATrainingType,
     TrainingType,
     FinetuneTrainingLimits,
+    FinetuneLRScheduler,
+    FinetuneLinearLRSchedulerArgs,
 )
 from together.types.images import (
     ImageRequest,
@@ -57,6 +59,8 @@ __all__ = [
     "FinetuneList",
     "FinetuneListEvents",
     "FinetuneDownloadResult",
+    "FinetuneLRScheduler",
+    "FinetuneLinearLRSchedulerArgs",
     "FileRequest",
     "FileResponse",
     "FileList",

together/types/finetune.py CHANGED Viewed

@@ -150,8 +150,14 @@ class FinetuneRequest(BaseModel):
     n_epochs: int
     # training learning rate
     learning_rate: float
+    # learning rate scheduler type and args
+    lr_scheduler: FinetuneLRScheduler | None = None
     # learning rate warmup ratio
     warmup_ratio: float
+    # max gradient norm
+    max_grad_norm: float
+    # weight decay
+    weight_decay: float
     # number of checkpoints to save
     n_checkpoints: int | None = None
     # number of evaluation loops to run
@@ -193,8 +199,14 @@ class FinetuneResponse(BaseModel):
     batch_size: int | None = None
     # training learning rate
     learning_rate: float | None = None
+    # learning rate scheduler type and args
+    lr_scheduler: FinetuneLRScheduler | None = None
     # learning rate warmup ratio
     warmup_ratio: float | None = None
+    # max gradient norm
+    max_grad_norm: float | None = None
+    # weight decay
+    weight_decay: float | None = None
     # number of steps between evals
     eval_steps: int | None = None
     # training type
@@ -287,3 +299,12 @@ class FinetuneTrainingLimits(BaseModel):
     min_learning_rate: float
     full_training: FinetuneFullTrainingLimits | None = None
     lora_training: FinetuneLoraTrainingLimits | None = None
+class FinetuneLRScheduler(BaseModel):
+    lr_scheduler_type: str
+    lr_scheduler_args: FinetuneLinearLRSchedulerArgs | None = None
+class FinetuneLinearLRSchedulerArgs(BaseModel):
+    min_lr_ratio: float | None = 0.0

{together-1.3.4.dist-info → together-1.3.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: together
-Version: 1.3.4
+Version: 1.3.5
 Summary: Python client for Together's Cloud Platform!
 Home-page: https://github.com/togethercomputer/together-python
 License: Apache-2.0

{together-1.3.4.dist-info → together-1.3.5.dist-info}/RECORD RENAMED Viewed

@@ -6,7 +6,7 @@ together/cli/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
 together/cli/api/chat.py,sha256=2PHRb-9T-lUEKhUJFtc7SxJv3shCVx40gq_8pzfsewM,9234
 together/cli/api/completions.py,sha256=l-Zw5t7hojL3w8xd_mitS2NRB72i5Z0xwkzH0rT5XMc,4263
 together/cli/api/files.py,sha256=QLYEXRkY8J2Gg1SbTCtzGfoTMvosoeACNK83L_oLubs,3397
-together/cli/api/finetune.py,sha256=vl-0cTubZER7wKEPFTFfhe8_Ry_Squ4PypPzR0VHClg,12175
+together/cli/api/finetune.py,sha256=78dJs_hF_gDWQjUT5R3v518GmNQnnB0Qt8CyU68e5jY,12760
 together/cli/api/images.py,sha256=GADSeaNUHUVMtWovmccGuKc28IJ9E_v4vAEwYHJhu5o,2645
 together/cli/api/models.py,sha256=xWEzu8ZpxM_Pz9KEjRPRVuv_v22RayYZ4QcgiezT5tE,1126
 together/cli/api/utils.py,sha256=IuqYWPnLI38_Bqd7lj8V_SnGdYc59pRmMbQmciS4FsM,1326
@@ -29,12 +29,12 @@ together/resources/chat/completions.py,sha256=jYiNZsWa8RyEacL0VgxWj1egJ857oU4nxI
 together/resources/completions.py,sha256=5Wa-ZjPCxRcam6CDe7KgGYlTA7yJZMmd5TrRgGCL_ug,11726
 together/resources/embeddings.py,sha256=PTvLb82yjG_-iQOyuhsilp77Fr7gZ0o6WD2KeRnKoxs,2675
 together/resources/files.py,sha256=bnPbaF25e4InBRPvHwXHXT-oSX1Z1sZRsnQW5wq82U4,4990
-together/resources/finetune.py,sha256=K_jLNeApduKQXtz9rN7V_tG_IZdfwGrmf_zYgJNX9aA,23609
+together/resources/finetune.py,sha256=UcbPAZ0b_WR3ks754n5fPzDjraNQHSkulaKGmQQZ2Zs,25516
 together/resources/images.py,sha256=LQUjKPaFxWTqOAPnyF1Pp7Rz4NLOYhmoKwshpYiprEM,4923
 together/resources/models.py,sha256=2dtHhXAqTDOOpwSbYLzWcKTC0-m2Szlb7LDYvp7Jr4w,1786
 together/resources/rerank.py,sha256=3Ju_aRSyZ1s_3zCSNZnSnEJErUVmt2xa3M8z1nvejMA,3931
 together/together_response.py,sha256=MhczUCPem93cjX-A1TOAUrRj3sO-o3SLcEcTsZgVzQI,1319
-together/types/__init__.py,sha256=oHZCMC0H3j1ykf7ZRgxIU0QBA534EMpfKqRaa9SdgOo,1739
+together/types/__init__.py,sha256=jEnnepzUeeYgCNTQIi4EWKaOEsZKYp0vEqzYmP8bK5o,1863
 together/types/abstract.py,sha256=1lFQI_3WjsR_t1128AeKW0aTk6EiM6Gh1J3ZuyLLPao,642
 together/types/chat_completions.py,sha256=d24F3VfT7uVnmaEk7Fn-O7qkGUg_AQQzR7vPwlXVDXw,4882
 together/types/common.py,sha256=4ZeIgqGioqhIC-nNxY90czNPp-kAqboMulw6-1z6ShM,1511
@@ -42,7 +42,7 @@ together/types/completions.py,sha256=o3FR5ixsTUj-a3pmOUzbSQg-hESVhpqrC9UD__VCqr4
 together/types/embeddings.py,sha256=J7grkYYn7xhqeKaBO2T-8XQRtHhkzYzymovtGdIUK5A,751
 together/types/error.py,sha256=OVlCs3cx_2WhZK4JzHT8SQyRIIqKOP1AZQ4y1PydjAE,370
 together/types/files.py,sha256=-rEUfsV6f2vZB9NrFxT4_933ubsDIUNkPB-3OlOFk4A,1954
-together/types/finetune.py,sha256=1-EZ-HB1wA2fYX2Gt8u-nVPy6UgVyNQwh4aYzvo8eic,8079
+together/types/finetune.py,sha256=17IM5A__GnT6hgMClMz0vESohWI_qh5Eeq3iR9w1ODg,8704
 together/types/images.py,sha256=xnC-FZGdZU30WSFTybfGneWxb-kj0ZGufJsgHtB8j0k,980
 together/types/models.py,sha256=K9Om3cCFexy7qzRSEXUj7gpCy1CVb1hHx7MGG-hvTLw,1035
 together/types/rerank.py,sha256=qZfuXOn7MZ6ly8hpJ_MZ7OU_Bi1-cgYNSB20Wja8Qkk,1061
@@ -52,8 +52,8 @@ together/utils/api_helpers.py,sha256=RSF7SRhbjHzroMOSWAXscflByM1r1ta_1SpxkAT22iE
 together/utils/files.py,sha256=rBCwez0i0bcJIgQQsgd-ROgcakR5NfSmUreYPQoE5Nk,13005
 together/utils/tools.py,sha256=3-lXWP3cBCzOVSZg9tr5zOT1jaVeKAKVWxO2fcXZTh8,1788
 together/version.py,sha256=p03ivHyE0SyWU4jAnRTBi_sOwywVWoZPU4g2gzRgG-Y,126
-together-1.3.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-together-1.3.4.dist-info/METADATA,sha256=4z5uVKF141cdQiwBWGVlpBFvkMAOHb5RDExHDh9UtFg,11829
-together-1.3.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-together-1.3.4.dist-info/entry_points.txt,sha256=G-b5NKW6lUUf1V1fH8IPTBb7jXnK7lhbX9H1zTEJXPs,50
-together-1.3.4.dist-info/RECORD,,
+together-1.3.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+together-1.3.5.dist-info/METADATA,sha256=4naWLEoh8icjBGlIVvJSXlNjtwFGdgKpWi-hVEXDo-E,11829
+together-1.3.5.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+together-1.3.5.dist-info/entry_points.txt,sha256=G-b5NKW6lUUf1V1fH8IPTBb7jXnK7lhbX9H1zTEJXPs,50
+together-1.3.5.dist-info/RECORD,,

{together-1.3.4.dist-info → together-1.3.5.dist-info}/LICENSE RENAMED Viewed

File without changes

{together-1.3.4.dist-info → together-1.3.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{together-1.3.4.dist-info → together-1.3.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

together 1.3.4__py3-none-any.whl → 1.3.5__py3-none-any.whl

together 1.3.4py3-none-any.whl → 1.3.5py3-none-any.whl