together 1.3.4__py3-none-any.whl → 1.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -65,12 +65,30 @@ def fine_tuning(ctx: click.Context) -> None:
65
65
  )
66
66
  @click.option("--batch-size", type=INT_WITH_MAX, default="max", help="Train batch size")
67
67
  @click.option("--learning-rate", type=float, default=1e-5, help="Learning rate")
68
+ @click.option(
69
+ "--min-lr-ratio",
70
+ type=float,
71
+ default=0.0,
72
+ help="The ratio of the final learning rate to the peak learning rate",
73
+ )
68
74
  @click.option(
69
75
  "--warmup-ratio",
70
76
  type=float,
71
77
  default=0.0,
72
78
  help="Warmup ratio for learning rate scheduler.",
73
79
  )
80
+ @click.option(
81
+ "--max-grad-norm",
82
+ type=float,
83
+ default=1.0,
84
+ help="Max gradient norm to be used for gradient clipping. Set to 0 to disable.",
85
+ )
86
+ @click.option(
87
+ "--weight-decay",
88
+ type=float,
89
+ default=0.0,
90
+ help="Weight decay",
91
+ )
74
92
  @click.option(
75
93
  "--lora/--no-lora",
76
94
  type=bool,
@@ -115,7 +133,10 @@ def create(
115
133
  n_checkpoints: int,
116
134
  batch_size: int | Literal["max"],
117
135
  learning_rate: float,
136
+ min_lr_ratio: float,
118
137
  warmup_ratio: float,
138
+ max_grad_norm: float,
139
+ weight_decay: float,
119
140
  lora: bool,
120
141
  lora_r: int,
121
142
  lora_dropout: float,
@@ -138,7 +159,10 @@ def create(
138
159
  n_checkpoints=n_checkpoints,
139
160
  batch_size=batch_size,
140
161
  learning_rate=learning_rate,
162
+ min_lr_ratio=min_lr_ratio,
141
163
  warmup_ratio=warmup_ratio,
164
+ max_grad_norm=max_grad_norm,
165
+ weight_decay=weight_decay,
142
166
  lora=lora,
143
167
  lora_r=lora_r,
144
168
  lora_dropout=lora_dropout,
@@ -20,6 +20,8 @@ from together.types import (
20
20
  TogetherClient,
21
21
  TogetherRequest,
22
22
  TrainingType,
23
+ FinetuneLRScheduler,
24
+ FinetuneLinearLRSchedulerArgs,
23
25
  )
24
26
  from together.types.finetune import DownloadCheckpointType
25
27
  from together.utils import log_warn_once, normalize_key
@@ -35,7 +37,10 @@ def createFinetuneRequest(
35
37
  n_checkpoints: int | None = 1,
36
38
  batch_size: int | Literal["max"] = "max",
37
39
  learning_rate: float | None = 0.00001,
38
- warmup_ratio: float | None = 0.0,
40
+ min_lr_ratio: float = 0.0,
41
+ warmup_ratio: float = 0.0,
42
+ max_grad_norm: float = 1.0,
43
+ weight_decay: float = 0.0,
39
44
  lora: bool = False,
40
45
  lora_r: int | None = None,
41
46
  lora_dropout: float | None = 0,
@@ -83,6 +88,20 @@ def createFinetuneRequest(
83
88
  if warmup_ratio > 1 or warmup_ratio < 0:
84
89
  raise ValueError("Warmup ratio should be between 0 and 1")
85
90
 
91
+ if min_lr_ratio is not None and (min_lr_ratio > 1 or min_lr_ratio < 0):
92
+ raise ValueError("Min learning rate ratio should be between 0 and 1")
93
+
94
+ if max_grad_norm < 0:
95
+ raise ValueError("Max gradient norm should be non-negative")
96
+
97
+ if weight_decay is not None and (weight_decay < 0):
98
+ raise ValueError("Weight decay should be non-negative")
99
+
100
+ lrScheduler = FinetuneLRScheduler(
101
+ lr_scheduler_type="linear",
102
+ lr_scheduler_args=FinetuneLinearLRSchedulerArgs(min_lr_ratio=min_lr_ratio),
103
+ )
104
+
86
105
  finetune_request = FinetuneRequest(
87
106
  model=model,
88
107
  training_file=training_file,
@@ -92,7 +111,10 @@ def createFinetuneRequest(
92
111
  n_checkpoints=n_checkpoints,
93
112
  batch_size=batch_size,
94
113
  learning_rate=learning_rate,
114
+ lr_scheduler=lrScheduler,
95
115
  warmup_ratio=warmup_ratio,
116
+ max_grad_norm=max_grad_norm,
117
+ weight_decay=weight_decay,
96
118
  training_type=training_type,
97
119
  suffix=suffix,
98
120
  wandb_key=wandb_api_key,
@@ -117,7 +139,10 @@ class FineTuning:
117
139
  n_checkpoints: int | None = 1,
118
140
  batch_size: int | Literal["max"] = "max",
119
141
  learning_rate: float | None = 0.00001,
120
- warmup_ratio: float | None = 0.0,
142
+ min_lr_ratio: float = 0.0,
143
+ warmup_ratio: float = 0.0,
144
+ max_grad_norm: float = 1.0,
145
+ weight_decay: float = 0.0,
121
146
  lora: bool = False,
122
147
  lora_r: int | None = None,
123
148
  lora_dropout: float | None = 0,
@@ -143,7 +168,11 @@ class FineTuning:
143
168
  batch_size (int or "max"): Batch size for fine-tuning. Defaults to max.
144
169
  learning_rate (float, optional): Learning rate multiplier to use for training
145
170
  Defaults to 0.00001.
171
+ min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
172
+ the learning rate scheduler. Defaults to 0.0.
146
173
  warmup_ratio (float, optional): Warmup ratio for learning rate scheduler.
174
+ max_grad_norm (float, optional): Max gradient norm. Defaults to 1.0, set to 0 to disable.
175
+ weight_decay (float, optional): Weight decay. Defaults to 0.0.
147
176
  lora (bool, optional): Whether to use LoRA adapters. Defaults to True.
148
177
  lora_r (int, optional): Rank of LoRA adapters. Defaults to 8.
149
178
  lora_dropout (float, optional): Dropout rate for LoRA adapters. Defaults to 0.
@@ -185,7 +214,10 @@ class FineTuning:
185
214
  n_checkpoints=n_checkpoints,
186
215
  batch_size=batch_size,
187
216
  learning_rate=learning_rate,
217
+ min_lr_ratio=min_lr_ratio,
188
218
  warmup_ratio=warmup_ratio,
219
+ max_grad_norm=max_grad_norm,
220
+ weight_decay=weight_decay,
189
221
  lora=lora,
190
222
  lora_r=lora_r,
191
223
  lora_dropout=lora_dropout,
@@ -436,7 +468,10 @@ class AsyncFineTuning:
436
468
  n_checkpoints: int | None = 1,
437
469
  batch_size: int | Literal["max"] = "max",
438
470
  learning_rate: float | None = 0.00001,
439
- warmup_ratio: float | None = 0.0,
471
+ min_lr_ratio: float = 0.0,
472
+ warmup_ratio: float = 0.0,
473
+ max_grad_norm: float = 1.0,
474
+ weight_decay: float = 0.0,
440
475
  lora: bool = False,
441
476
  lora_r: int | None = None,
442
477
  lora_dropout: float | None = 0,
@@ -462,7 +497,11 @@ class AsyncFineTuning:
462
497
  batch_size (int, optional): Batch size for fine-tuning. Defaults to max.
463
498
  learning_rate (float, optional): Learning rate multiplier to use for training
464
499
  Defaults to 0.00001.
500
+ min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
501
+ the learning rate scheduler. Defaults to 0.0.
465
502
  warmup_ratio (float, optional): Warmup ratio for learning rate scheduler.
503
+ max_grad_norm (float, optional): Max gradient norm. Defaults to 1.0, set to 0 to disable.
504
+ weight_decay (float, optional): Weight decay. Defaults to 0.0.
466
505
  lora (bool, optional): Whether to use LoRA adapters. Defaults to True.
467
506
  lora_r (int, optional): Rank of LoRA adapters. Defaults to 8.
468
507
  lora_dropout (float, optional): Dropout rate for LoRA adapters. Defaults to 0.
@@ -504,7 +543,10 @@ class AsyncFineTuning:
504
543
  n_checkpoints=n_checkpoints,
505
544
  batch_size=batch_size,
506
545
  learning_rate=learning_rate,
546
+ min_lr_ratio=min_lr_ratio,
507
547
  warmup_ratio=warmup_ratio,
548
+ max_grad_norm=max_grad_norm,
549
+ weight_decay=weight_decay,
508
550
  lora=lora,
509
551
  lora_r=lora_r,
510
552
  lora_dropout=lora_dropout,
@@ -30,6 +30,8 @@ from together.types.finetune import (
30
30
  LoRATrainingType,
31
31
  TrainingType,
32
32
  FinetuneTrainingLimits,
33
+ FinetuneLRScheduler,
34
+ FinetuneLinearLRSchedulerArgs,
33
35
  )
34
36
  from together.types.images import (
35
37
  ImageRequest,
@@ -57,6 +59,8 @@ __all__ = [
57
59
  "FinetuneList",
58
60
  "FinetuneListEvents",
59
61
  "FinetuneDownloadResult",
62
+ "FinetuneLRScheduler",
63
+ "FinetuneLinearLRSchedulerArgs",
60
64
  "FileRequest",
61
65
  "FileResponse",
62
66
  "FileList",
@@ -150,8 +150,14 @@ class FinetuneRequest(BaseModel):
150
150
  n_epochs: int
151
151
  # training learning rate
152
152
  learning_rate: float
153
+ # learning rate scheduler type and args
154
+ lr_scheduler: FinetuneLRScheduler | None = None
153
155
  # learning rate warmup ratio
154
156
  warmup_ratio: float
157
+ # max gradient norm
158
+ max_grad_norm: float
159
+ # weight decay
160
+ weight_decay: float
155
161
  # number of checkpoints to save
156
162
  n_checkpoints: int | None = None
157
163
  # number of evaluation loops to run
@@ -193,8 +199,14 @@ class FinetuneResponse(BaseModel):
193
199
  batch_size: int | None = None
194
200
  # training learning rate
195
201
  learning_rate: float | None = None
202
+ # learning rate scheduler type and args
203
+ lr_scheduler: FinetuneLRScheduler | None = None
196
204
  # learning rate warmup ratio
197
205
  warmup_ratio: float | None = None
206
+ # max gradient norm
207
+ max_grad_norm: float | None = None
208
+ # weight decay
209
+ weight_decay: float | None = None
198
210
  # number of steps between evals
199
211
  eval_steps: int | None = None
200
212
  # training type
@@ -287,3 +299,12 @@ class FinetuneTrainingLimits(BaseModel):
287
299
  min_learning_rate: float
288
300
  full_training: FinetuneFullTrainingLimits | None = None
289
301
  lora_training: FinetuneLoraTrainingLimits | None = None
302
+
303
+
304
+ class FinetuneLRScheduler(BaseModel):
305
+ lr_scheduler_type: str
306
+ lr_scheduler_args: FinetuneLinearLRSchedulerArgs | None = None
307
+
308
+
309
+ class FinetuneLinearLRSchedulerArgs(BaseModel):
310
+ min_lr_ratio: float | None = 0.0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: together
3
- Version: 1.3.4
3
+ Version: 1.3.5
4
4
  Summary: Python client for Together's Cloud Platform!
5
5
  Home-page: https://github.com/togethercomputer/together-python
6
6
  License: Apache-2.0
@@ -6,7 +6,7 @@ together/cli/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
6
6
  together/cli/api/chat.py,sha256=2PHRb-9T-lUEKhUJFtc7SxJv3shCVx40gq_8pzfsewM,9234
7
7
  together/cli/api/completions.py,sha256=l-Zw5t7hojL3w8xd_mitS2NRB72i5Z0xwkzH0rT5XMc,4263
8
8
  together/cli/api/files.py,sha256=QLYEXRkY8J2Gg1SbTCtzGfoTMvosoeACNK83L_oLubs,3397
9
- together/cli/api/finetune.py,sha256=vl-0cTubZER7wKEPFTFfhe8_Ry_Squ4PypPzR0VHClg,12175
9
+ together/cli/api/finetune.py,sha256=78dJs_hF_gDWQjUT5R3v518GmNQnnB0Qt8CyU68e5jY,12760
10
10
  together/cli/api/images.py,sha256=GADSeaNUHUVMtWovmccGuKc28IJ9E_v4vAEwYHJhu5o,2645
11
11
  together/cli/api/models.py,sha256=xWEzu8ZpxM_Pz9KEjRPRVuv_v22RayYZ4QcgiezT5tE,1126
12
12
  together/cli/api/utils.py,sha256=IuqYWPnLI38_Bqd7lj8V_SnGdYc59pRmMbQmciS4FsM,1326
@@ -29,12 +29,12 @@ together/resources/chat/completions.py,sha256=jYiNZsWa8RyEacL0VgxWj1egJ857oU4nxI
29
29
  together/resources/completions.py,sha256=5Wa-ZjPCxRcam6CDe7KgGYlTA7yJZMmd5TrRgGCL_ug,11726
30
30
  together/resources/embeddings.py,sha256=PTvLb82yjG_-iQOyuhsilp77Fr7gZ0o6WD2KeRnKoxs,2675
31
31
  together/resources/files.py,sha256=bnPbaF25e4InBRPvHwXHXT-oSX1Z1sZRsnQW5wq82U4,4990
32
- together/resources/finetune.py,sha256=K_jLNeApduKQXtz9rN7V_tG_IZdfwGrmf_zYgJNX9aA,23609
32
+ together/resources/finetune.py,sha256=UcbPAZ0b_WR3ks754n5fPzDjraNQHSkulaKGmQQZ2Zs,25516
33
33
  together/resources/images.py,sha256=LQUjKPaFxWTqOAPnyF1Pp7Rz4NLOYhmoKwshpYiprEM,4923
34
34
  together/resources/models.py,sha256=2dtHhXAqTDOOpwSbYLzWcKTC0-m2Szlb7LDYvp7Jr4w,1786
35
35
  together/resources/rerank.py,sha256=3Ju_aRSyZ1s_3zCSNZnSnEJErUVmt2xa3M8z1nvejMA,3931
36
36
  together/together_response.py,sha256=MhczUCPem93cjX-A1TOAUrRj3sO-o3SLcEcTsZgVzQI,1319
37
- together/types/__init__.py,sha256=oHZCMC0H3j1ykf7ZRgxIU0QBA534EMpfKqRaa9SdgOo,1739
37
+ together/types/__init__.py,sha256=jEnnepzUeeYgCNTQIi4EWKaOEsZKYp0vEqzYmP8bK5o,1863
38
38
  together/types/abstract.py,sha256=1lFQI_3WjsR_t1128AeKW0aTk6EiM6Gh1J3ZuyLLPao,642
39
39
  together/types/chat_completions.py,sha256=d24F3VfT7uVnmaEk7Fn-O7qkGUg_AQQzR7vPwlXVDXw,4882
40
40
  together/types/common.py,sha256=4ZeIgqGioqhIC-nNxY90czNPp-kAqboMulw6-1z6ShM,1511
@@ -42,7 +42,7 @@ together/types/completions.py,sha256=o3FR5ixsTUj-a3pmOUzbSQg-hESVhpqrC9UD__VCqr4
42
42
  together/types/embeddings.py,sha256=J7grkYYn7xhqeKaBO2T-8XQRtHhkzYzymovtGdIUK5A,751
43
43
  together/types/error.py,sha256=OVlCs3cx_2WhZK4JzHT8SQyRIIqKOP1AZQ4y1PydjAE,370
44
44
  together/types/files.py,sha256=-rEUfsV6f2vZB9NrFxT4_933ubsDIUNkPB-3OlOFk4A,1954
45
- together/types/finetune.py,sha256=1-EZ-HB1wA2fYX2Gt8u-nVPy6UgVyNQwh4aYzvo8eic,8079
45
+ together/types/finetune.py,sha256=17IM5A__GnT6hgMClMz0vESohWI_qh5Eeq3iR9w1ODg,8704
46
46
  together/types/images.py,sha256=xnC-FZGdZU30WSFTybfGneWxb-kj0ZGufJsgHtB8j0k,980
47
47
  together/types/models.py,sha256=K9Om3cCFexy7qzRSEXUj7gpCy1CVb1hHx7MGG-hvTLw,1035
48
48
  together/types/rerank.py,sha256=qZfuXOn7MZ6ly8hpJ_MZ7OU_Bi1-cgYNSB20Wja8Qkk,1061
@@ -52,8 +52,8 @@ together/utils/api_helpers.py,sha256=RSF7SRhbjHzroMOSWAXscflByM1r1ta_1SpxkAT22iE
52
52
  together/utils/files.py,sha256=rBCwez0i0bcJIgQQsgd-ROgcakR5NfSmUreYPQoE5Nk,13005
53
53
  together/utils/tools.py,sha256=3-lXWP3cBCzOVSZg9tr5zOT1jaVeKAKVWxO2fcXZTh8,1788
54
54
  together/version.py,sha256=p03ivHyE0SyWU4jAnRTBi_sOwywVWoZPU4g2gzRgG-Y,126
55
- together-1.3.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
56
- together-1.3.4.dist-info/METADATA,sha256=4z5uVKF141cdQiwBWGVlpBFvkMAOHb5RDExHDh9UtFg,11829
57
- together-1.3.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
58
- together-1.3.4.dist-info/entry_points.txt,sha256=G-b5NKW6lUUf1V1fH8IPTBb7jXnK7lhbX9H1zTEJXPs,50
59
- together-1.3.4.dist-info/RECORD,,
55
+ together-1.3.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
56
+ together-1.3.5.dist-info/METADATA,sha256=4naWLEoh8icjBGlIVvJSXlNjtwFGdgKpWi-hVEXDo-E,11829
57
+ together-1.3.5.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
58
+ together-1.3.5.dist-info/entry_points.txt,sha256=G-b5NKW6lUUf1V1fH8IPTBb7jXnK7lhbX9H1zTEJXPs,50
59
+ together-1.3.5.dist-info/RECORD,,